Hoppa till huvudinnehåll

BibTeX

@inProceedings{borin-etal-2012-korp-156080,
	title        = {Korp – the corpus infrastructure of Språkbanken},
	abstract     = {We present Korp, the corpus infrastructure of Språkbanken (the Swedish Language Bank). The infrastructure consists of three main components: the Korp corpus pipeline, the Korp backend, and the Korp frontend. The Korp corpus pipeline is used for importing corpora, annotating them, and then exporting the annotated corpora into different formats. An essential feature of the pipeline is the ability to leave existing annotations untouched, both structural and word level annotations, and to use the existing annotations as the foundation of other annotations. The Korp backend consists of a set of REST-based web services for searching in and retrieving information about the corpora. Finally, the Korp frontend is a graphical search interface that interacts with the Korp backend. The interface has been inspired by corpus search interfaces such as SketchEngine, Glossa, and DeepDict, and it uses State Chart XML (SCXML) in order to enable users to bookmark interaction states. We give a functional and technical overview of the three components, followed by a discussion of planned future work.
},
	booktitle    = {Proceedings of LREC 2012. Istanbul: ELRA},
	author       = {Borin, Lars and Forsberg, Markus and Roxendal, Johan},
	year         = {2012},
	volume       = {Accepted},
	pages        = {474–478},
}

@inProceedings{rama-borin-2012-properties-164449,
	title        = {Properties of phoneme N -grams across the world’s language families},
	abstract     = {In this article, we investigate the properties of phoneme N -grams across half of the world’s languages. The sizes of three different N -gram distributions of the world’s language families obey a power law. Further, the N -gram distributions of language families parallel the sizes of the families, which also follow a power law distribution. The correlation between N -gram distributions and language family sizes improves with increasing values of N . The study also raises some new questions about the use of N -gram distributions in linguistic research, which we hope to be able to investigate in the future.},
	booktitle    = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2012},
}

@book{borin-etal-2012-svenska-163410,
	title        = {Svenska språket i den digitala tidsåldern},
	author       = {Borin, Lars and Brandt, Martha and Edlund, Jens and Lindh, Jonas and Parkvall, Mikael},
	year         = {2012},
	publisher    = {Springer},
	address      = {Berlin},
	ISBN         = {978-3-642-30831-4},
}

@inProceedings{borin-etal-2012-open-156079,
	title        = {The open lexical infrastructure of Språkbanken},
	abstract     = {We present our ongoing work on Karp, Språkbanken’s (the Swedish Language Bank) open lexical infrastructure, which has two main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; and (2) to publish daily versions of the resources, making them searchable and downloadable. An important requirement on the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 15 Swedish lexical resources, including historical ones, some of which have been created from scratch using existing free resources, both external and in-house. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish. SALDO has been selected as the pivot partly because of its size and quality, but also because its form and sense units have been assigned persistent identifiers (PIDs) to which the lexical information in other lexical resources and in corpora are linked.},
	booktitle    = {Proceedings of the 8th International Conference on Language Resources and Evaluation : May 23-25, 2012 / eds. Nicoletta Calzolari },
	author       = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Uppström, Jonatan},
	year         = {2012},
	ISBN         = {978-2-9517408-7-7},
	pages        = {3598--3602},
}

@inProceedings{borin-etal-2012-growing-171988,
	title        = {Growing a Swedish constructicon in lexical soil},
	booktitle    = {Proceedings of the Swedish Language Technology Conference. Lund, October 24-26, 2012},
	author       = {Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia},
	year         = {2012},
	pages        = {10--11},
}

@inProceedings{lyngfelt-etal-2012-adding-163582,
	title        = {Adding a constructicon to the Swedish resource network of Språkbanken},
	abstract     = {This paper presents the integrated Swedish resource network of Språkbanken in general, and its latest addition – a constructicon – in particular. The constructicon, which is still in its early stages, is a collection of (partially) schematic multi-word units, constructions, developed as an addition to the Swedish FrameNet (SweFN). SweFN and the constructicon are integrated with other parts of Språkbanken, both lexical resources and corpora, through the lexical resource SALDO. In most respects, the constructicon is modeled on its English counterpart in Berkeley, and, thus, following the FrameNet format. The most striking differencies are the inclusion of so-called collostructional elements and the treatment of semantic roles, which are defined globally instead of locally as in FrameNet. Incorporating subprojects such as developing methods for automatic identification of constructions in authentic text on the one hand, and accounting for constructions problematic for L2 acquisition on the other, the approach is highly cross-disciplinary in nature, combining various theoretical linguistic perspectives on construction grammar with language technology, lexicography, and L2 research.},
	booktitle    = {11th Conference on Natural Language Processing (KONVENS) Proceedings},
	author       = {Lyngfelt, Benjamin and Borin, Lars and Forsberg, Markus and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia},
	year         = {2012},
	ISBN         = {3-85027-005-X},
	pages        = {452--461},
}

@inProceedings{borin-etal-2012-transferring-157213,
	title        = {Transferring Frames: Utilization of Linked Lexical Resources},
	abstract     = {In our experiment, we evaluate the transferability of  frames from Swedish to Finnish in parallel corpora. We evaluate both the theoretical possibility of transferring frames and the possibility of performing it using available lexical resources. We add the frame information to an extract of the Swedish side of the Kotus and JRC-Acquis corpora using an automatic frame labeler and copy it to the Finnish side. We focus on evaluating the results to get an estimation on how often the parallel sentences can be said to
express the same frame. This sheds light to the questions: Are the same situations in the two languages expressed using different frames, i.e. are the frames transferable even in theory? How well can the frame information of running text be transferred from language to another?
},
	booktitle    = {Proceedings of the Workshop on Inducing Linguistic Structure Submission (WILS)},
	author       = {Borin, Lars and Forsberg, Markus and Johansson, Richard and Muhonen, Kristiina and Purtonen, Tanja and Voionmaa, Kaarlo},
	year         = {2012},
	pages        = {8--15},
}

@inProceedings{borin-etal-2012-search-157338,
	title        = {Search Result Diversification Methods to Assist Lexicographers},
	abstract     = {We show how the lexicographic task of finding informative and diverse example sentences can be cast as a search result diversification problem, where an objective based on relevance and diversity is maximized. This problem has been studied intensively in the information retrieval community during recent years, and efficient algorithms have been devised.

We finally show how the approach has been implemented in a
lexicographic project, and describe the relevance and diversity
functions used in that context.
},
	booktitle    = {Proceedings of the 6th Linguistic Annotation Workshop},
	author       = {Borin, Lars and Forsberg, Markus and Friberg Heppin, Karin and Johansson, Richard and Kjellandsson, Annika},
	year         = {2012},
	pages        = {113--117},
}

@inProceedings{pedersen-etal-2012-linking-155599,
	title        = {Linking and validating Nordic and Baltic wordnets},
	booktitle    = {Proceedings of the 6th International Global Wordnet Conference},
	author       = {Pedersen, Bolette Sandford and Borin, Lars and Forsberg, Markus and Lindén, Krister and Orav, Heili and Rögnvaldsson, Eírikur},
	year         = {2012},
	volume       = {Accepted},
	pages        = {254--260},
}

@edited_book{borin-volodina-2012-proceedings-188679,
	title        = {Proceedings of the SLTC 2012 workshop on NLP for CALL},
	editor       = {Borin, Lars and Volodina, Elena},
	year         = {2012},
	publisher    = {LiU Electronic Press},
	address      = {Linköping},
}

@inProceedings{dannells-borin-2012-toward-156502,
	title        = {Toward language independent methodology for generating artwork descriptions – Exploring FrameNet information},
	abstract     = {Today museums and other cultural heritage institutions are increasingly storing object descriptions using semantic web domain ontologies. To make  this content accessible in a multilingual world, it will need to be conveyed in many languages, a language generation task which is domain specific and language dependent. This paper describes how semantic and syntactic information such as that provided in a framenet can contribute to solving this task. It is argued that the kind of information offered by such lexical resources enhances the output quality of a multilingual language generation application, in particular when generating domain specific content.
},
	booktitle    = {EACL 2012 workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH)},
	author       = {Dannélls, Dana and Borin, Lars},
	year         = {2012},
}

@inProceedings{volodina-etal-2012-towards-168516,
	title        = {Towards a system architecture for ICALL},
	abstract     = {In this paper, we present an on-going project whose overall aim is to develop open-source system architecture for supporting ICALL systems that will facilitate re-use of existing NLP tools and resources on a plug-and-play basis. We introduce the project, describe the approaches adopted by the two language teams, and present two applications being developed using the proposed architecture.},
	booktitle    = {In G. Biswas et al. (eds), Proceedings of the 20th International Conference on Computers in Education. Singapore: Asia-Pacific Society for Computers in Education},
	author       = {Volodina, Elena and Hrafn, Loftsson and Arnbjörnsdóttir, Birna and Borin, Lars and Leifsson, Guðmundur Örn},
	year         = {2012},
	volume       = {2012},
	ISBN         = {978-981-07-4649-0},
}

@edited_book{larsson-borin-2012-from-167661,
	title        = {From Quantification to Conversation},
	editor       = {Larsson, Staffan and Borin, Lars},
	year         = {2012},
	publisher    = {College Publications},
	address      = {London},
	ISBN         = {978-1-84890-091-2},
}

@inProceedings{volodina-borin-2012-developing-168523,
	title        = {Developing an Open-Source Web-Based Exercise Generator for Swedish},
	abstract     = {This paper reports on the ongoing international project System architecture for
ICALL and the progress made by the Swedish partner. The Swedish team is developing a
web-based exercise generator reusing available annotated corpora and lexical resources.
Apart from the technical issues like implementation of the user interface and the
underlying processing machinery, a number of interesting pedagogical questions need
to be solved, e.g., adapting learner-oriented exercises to proficiency levels; selecting authentic examples of an appropriate difficulty level; automatically ranking corpus examples by their quality; providing feedback to the learner, and selecting vocabulary for training domain-specific, academic or general-purpose vocabulary. In this paper we describe what has been done so far, mention the exercise types that can be generated at
the moment as well as describe the tasks left for the future.
},
	booktitle    = {CALL: Using, Learning, Knowing. EuroCALL Conference, Gothenburg, Sweden, 22-25 August 2012, Proceedings. Eds. Linda Bradley and Sylvie Thouësny. Research-publishing.net, Dublin, Ireland},
	author       = {Volodina, Elena and Borin, Lars},
	year         = {2012},
	volume       = {2012},
	ISBN         = {978-1-908416-03-2},
}

@inProceedings{volodina-etal-2012-waste-165936,
	title        = {Waste not, want not: Towards a system architecture for ICALL based on NLP component re-use},
	booktitle    = {Proceedings of the SLTC 2012 workshop on NLP for CALL, Lund, 25th October, 2012},
	author       = {Volodina, Elena and Borin, Lars and Loftsson, Hrafn and Arnbjörnsdóttir, Birna and Leifsson, Guðmundur Örn},
	year         = {2012},
	pages        = {47--58},
}

@incollection{borin-2012-core-162377,
	title        = {Core vocabulary: A useful but mystical concept in some kinds of linguistics},
	booktitle    = {Shall we play the festschrift game ? Essays on the Occasion of Lauri Carlson's 60th Birthday},
	author       = {Borin, Lars},
	year         = {2012},
	publisher    = {Springer},
	address      = {Berlin},
	ISBN         = {978-3-642-30772-0},
	pages        = {53--65},
}