@inProceedings{borin-etal-2014-bring-198549, title = {Bring vs. MTRoget: Evaluating automatic thesaurus translation}, booktitle = {Proceedings of LREC 2014, May 26-31, 2014 Reykjavik, Iceland}, author = {Borin, Lars and Allwood, Jens and de Melo, Gerard}, year = {2014}, publisher = {European Language Resources Association}, ISBN = {978-2-9517408-8-4}, } @article{forsberg-etal-2014-from-208123, title = {From construction candidates to constructicon entries: An experiment using semi-automatic methods for identifying constructions in corpora}, abstract = { We present an experiment where natural language processing tools are used to automatically identify potential constructions in a corpus. e experiment was conducted as part of the ongoing efforts to develop a Swedish constructicon. Using an automatic method to suggest constructions has advantages not only for efficiency but also methodologically: it forces the analyst to look more objec-tively at the constructions actually occurring in corpora, as opposed to focusing on “interesting” constructions only. As a heuristic for identifying potential con-structions, the method has proved successful, yielding about 200 (out of 1,200) highly relevant construction candidates.}, journal = {Constructions and Frames}, author = {Forsberg, Markus and Johansson, Richard and Bäckström, Linnéa and Borin, Lars and Lyngfelt, Benjamin and Olofsson, Joel and Prentice, Julia}, year = {2014}, volume = {6}, number = {1, 2014}, pages = {114--135}, } @incollection{ribeck-borin-2014-lexical-201965, title = {Lexical Bundles in Swedish Secondary School Textbooks}, abstract = {The present paper describes the process of identifying lexical bundles, i.e., frequently recurring word sequences such as by means of and in the end of, in secondary school history and physics textbooks. In its determination of finding genuine lexical bundles, i.e. the word boundaries between lexical bundles and surrounding arbitrary words, it proposes a new approach to come to terms with the problem of extracting overlapping bundles of different lengths. The results of the structural classification indicate that history uses more NP/PP-based and less dependent-clause-based bundles than physics. The comparative analysis manages to restrict this difference to the referential function. History almost only refers to phrases, i.e. within clauses, while physics much more tends to make references across clauses. The article also includes a report on an extension of the study, ongoing work where the automatic identification of multi-word expressions in general is in focus.}, booktitle = {Human Language Technology Challenges for Computer Science and Linguistics 5th Language and Technology Conference, LTC 2011, Poznań, Poland, November 25--27, 2011, Revised Selected Papers}, editor = {Zygmunt Vetulani and Joseph Mariani.}, author = {Ribeck, Judy Carola and Borin, Lars}, year = {2014}, publisher = {Springer International Publishing}, volume = {2014}, number = {XVI}, address = {Cham}, ISBN = {978-3-319-08958-4}, pages = {238--249}, } @inProceedings{ahlberg-etal-2014-swedish-210083, title = {Swedish FrameNet++ The Beginning of the End and the End of the Beginning}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014}, author = {Ahlberg, Malin and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Friberg Heppin, Karin and Johansson, Richard and Kokkinakis, Dimitrios and Olsson, Leif-Jöran and Uppström, Jonatan}, year = {2014}, } @inProceedings{kokkinakis-etal-2014-hfst-209800, title = {HFST-SweNER . A New NER Resource for Swedish}, abstract = {Named entity recognition (NER) is a knowledge-intensive information extraction task that is used for recognizing textual mentions of entities that belong to a predefined set of categories, such as locations, organizations and time expressions. NER is a challenging, difficult, yet essential preprocessing technology for many natural language processing applications, and particularly crucial for language understanding. NER has been actively explored in academia and in industry especially during the last years due to the advent of social media data. This paper describes the conversion, modeling and adaptation of a Swedish NER system from a hybrid environment, with integrated functionality from various processing components, to the Helsinki Finite-State Transducer Technology (HFST) platform. This new HFST-based NER (HFST-SweNER) is a full-fledged open source implementation that supports a variety of generic named entity types and consists of multiple, reusable resource layers, e.g., various n-gram-based named entity lists (gazetteers).}, booktitle = {Proceedings of the 9th edition of the Language Resources and Evaluation Conference (LREC), Reykjavik 26 - 31 May 2014.}, author = {Kokkinakis, Dimitrios and Niemi, Jyrki and hardwick, sam and Lindén, Krister and Borin, Lars}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {2537--2543}, } @inProceedings{adesam-etal-2014-koala-211376, title = {Koala – Korp’s Linguistic Annotations Developing an infrastructure for text-based research with high-quality annotations}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014}, author = {Adesam, Yvonne and Borin, Lars and Bouma, Gerlof and Forsberg, Markus and Johansson, Richard}, year = {2014}, } @article{borin-etal-2014-geographic-198286, title = {Geographic visualization of place names in Swedish literary texts}, abstract = {This article describes the development of a geographical information system (GIS) at Språkbanken as part of a visualization solution to be used in an archive of historical Swedish literary texts. The research problems we are aiming to address concern orthographic and morphological variation, missing place names, and missing place name coordinates. Some of these problems form a central part in the development of methods and tools for the automatic analysis of historical Swedish literary texts at our research unit. We discuss the advantages and challenges of covering large-scale spelling variation in place names from different sources and in generating maps with focus on different time periods. }, journal = {Literary & Linguistic Computing}, author = {Borin, Lars and Dannélls, Dana and Olsson, Leif-Jöran}, year = {2014}, volume = {29}, number = {3}, pages = {400--404}, } @inProceedings{lyngfelt-etal-2014-svenskt-208457, title = {Ett svenskt konstruktikon. Grammatik möter lexikon}, booktitle = {Svenskans beskrivning : Förhandlingar vid Trettiotredje sammankomsten för svenskans beskrivning. Helsingfors den 15–17 maj 2013}, author = {Lyngfelt, Benjamin and Borin, Lars and Bäckström, Linnéa and Forsberg, Markus and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia and Uppström, Jonatan}, year = {2014}, volume = {33}, ISBN = {978-951-51-0120-4}, pages = {268--279}, } @article{borin-etal-2014-introduction-202127, title = {Introduction: Constructions and frames meet language technology}, journal = {Constructions and Frames}, author = {Borin, Lars and de Melo, Gerard and Friberg Heppin, Karin and Torrent, Tiago Timponi}, year = {2014}, volume = {6}, number = {1}, pages = {1--8}, } @edited_book{volodina-etal-2014-proceedings-206135, title = {Proceedings of the third workshop on NLP for computer-assisted language learning at SLTC 2014, Uppsala University}, abstract = {The workshop series on NLP for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The papers in the proceedings volume from the third NLP4CALL workshop cover three main topic areas: resources for development of ICALL applications (e.g., learner corpora and coursebook corpora), tools and algorithms for the analysis of learner language (e.g., focusing on collocations, reading tasks, cloze items, pronunciation, spelling, level classification of learner production), and the generation of learning materials (e.g., exercise generators).}, editor = {Volodina, Elena and Borin, Lars and Pilán, Ildikó}, year = {2014}, publisher = {Linköping University Press}, address = {Linköping}, ISBN = {978-91-7519-175-1}, } @inProceedings{borin-etal-2014-representing-204731, title = {Representing Swedish Lexical Resources in RDF with lemon}, abstract = {The paper presents an ongoing project which aims to publish Swedish lexical-semantic resources using Semantic Web and Linked Data technologies. In this article, we highlight the practical conversion methods and challenges of converting three of the Swedish language resources in RDF with lemon.}, booktitle = { Proceedings of the ISWC 2014 Posters & Demonstrations Track a track within the 13th International Semantic Web Conference (ISWC 2014)}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and McCrae, John P.}, year = {2014}, volume = {1272 }, pages = {329--332}, } @inProceedings{volodina-etal-2014-flexible-201885, title = {A flexible language learning platform based on language resources and web services. }, abstract = {We present Lärka, the language learning platform of Språkbanken (the Swedish Language Bank). It consists of an exercise generator which reuses resources available through Språkbanken: mainly Korp, the corpus infrastructure, and Karp, the lexical infrastructure. Through Lärka we reach new user groups – students and teachers of Linguistics as well as second language learners and their teachers – and this way bring Språkbanken's resources in a relevant format to them. Lärka can therefore be viewed as a case of a real-life language resource evaluation with end users. In this article we describe Lärka's architecture, its user interface, and the five exercise types that have been released for users so far. The first user evaluation following in-class usage with students of linguistics, speech therapy and teacher candidates are presented. The outline of future work concludes the paper.}, booktitle = {Proceedings of LREC 26-31 May 2014, Reykjavik, Iceland }, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Tiedemann, Therese Lindström}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {3973--3978}, } @inProceedings{rehm-etal-2014-strategic-198556, title = {The strategic impact of META-NET on the regional, national and international level}, booktitle = {Proceedings of LREC 2014, 26-31 May, Reykjavik, Iceland }, author = {Rehm, Georg and Uszkoreit, Hans and Ananiadou, Sophia and Bel, Núria and Bieleviciene, Audrone and Borin, Lars and Branco, António and Budin, Gerhard and Calzolari, Nicoletta and Daelemans, Walter and Garabík, Radovan and Grobelnik, Marko and Garcia-Mateo, Carmen and Genabith, Josef Van and Hajic, Jan and Hernaez, Inma and Judge, John and Koeva, Svetla and Krek, Simon and Krstev, Cvetana and Lindén, Krister and Magnini, Bernardo and Mariani, Joseph and Mcnaught, John and Melero, Maite and Monachini, Monica and Moreno, Asuncion and Odijk, Jan and Ogrodniczuk, Maciej and Pezik, Piotr and Piperidis, Stelios and Przepiórkowski, Adam and Rögnvaldsson, Eiríkur and Rosner, Michael and Pedersen, Bolette Sandford and Skadina, Inguna and De Smedt, Koenraad and Tadić, Marko and Thompson, Paul and Tufiș, Dan and Váradi, Tamás and Vasiljevs, Andrejs and Vider, Kadri and Zabarskaite, Jolanta}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {1517--1524}, } @inProceedings{borin-etal-2014-linguistic-198551, title = {Linguistic landscaping of South Asia using digital language resources: Genetic vs. areal linguistics}, booktitle = {Proceedings of LREC, May 26-31, 2014, Reykjavik, Iceland}, author = {Borin, Lars and Saxena, Anju and Rama, Taraka and Comrie, Bernard}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {3137--3144}, } @inProceedings{borin-forsberg-2014-swesaurus;-193085, title = {Swesaurus; or, The Frankenstein Approach to Wordnet Construction}, abstract = {Swesaurus is a freely available (under a CC-BY license) Swedish wordnet under construction, built primarily by scavenging and recycling information from a number of existing lexical resources. Among its more unusual characteristics are graded lexical-semantic relations and inclusion of all parts of speech, not only open-class items. }, booktitle = {Proceedings of the Seventh Global WordNet Conference (GWC 2014)}, author = {Borin, Lars and Forsberg, Markus}, year = {2014}, ISBN = {978-9949-32-492-7}, } @article{rama-borin-2014-gram-187121, title = {N-Gram Approaches to the Historical Dynamics of Basic Vocabulary}, journal = {Journal of Quantitative Linguistics}, author = {Rama, Taraka and Borin, Lars}, year = {2014}, volume = {21}, number = {1}, pages = {50--64}, } @article{borin-johansson-2014-kulturomik-192931, title = {Kulturomik: Att spana efter språkliga och kulturella förändringar i digitala textarkiv}, journal = {Historia i en digital värld}, author = {Borin, Lars and Johansson, Richard}, year = {2014}, }