Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@article{dupplaw-etal-2014-information-195563,
	title        = {Information extraction from multimedia web documents: an open-source platform and testbed},
	abstract     = {The LivingKnowledge project aimed to enhance the current state of the art in search, retrieval and knowledge management on the web by advancing the use of sentiment and opinion analysis within multimedia applications. To achieve this aim, a diverse set of novel and complementary analysis techniques have been integrated into a single, but extensible software platform on which such applications can be built. The platform combines state-of-the-art techniques for extracting facts, opinions and sentiment from multimedia documents, and unlike earlier platforms, it exploits both visual and textual techniques to support multimedia information retrieval. Foreseeing the usefulness of this software in the wider community, the platform has been made generally available as an open-source project. This paper describes the platform design, gives an overview of the analysis algorithms integrated into the system and describes two applications that utilise the system for multimedia information retrieval.},
	journal      = {International Journal of Multimedia Information Retrieval},
	author       = {Dupplaw, David and Matthews, Michael and Johansson, Richard and Boato, Giulia and Costanzo, Andrea and Fontani, Marco and Minack, Enrico and Demidova, Elena and Blanco, Roi and Griffiths, Thomas and Lewis, Paul and Hare, Jonathon and Moschitti, Alessandro},
	year         = {2014},
	volume       = {3},
	number       = {2},
	pages        = {97--111},
}

@article{forsberg-etal-2014-from-208123,
	title        = {From construction candidates to constructicon entries: An experiment using semi-automatic methods for identifying constructions in corpora},
	abstract     = { We present an experiment where natural language processing tools are used to automatically identify potential constructions in a corpus. e experiment was conducted as part of the ongoing efforts to develop a Swedish constructicon. Using an automatic method to suggest constructions has advantages not only for efficiency but also methodologically: it forces the analyst to look more objec-tively at the constructions actually occurring in corpora, as opposed to focusing on “interesting” constructions only. As a heuristic for identifying potential con-structions, the method has proved successful, yielding about 200 (out of 1,200) highly relevant construction candidates.},
	journal      = {Constructions and Frames},
	author       = {Forsberg, Markus and Johansson, Richard and Bäckström, Linnéa and Borin, Lars and Lyngfelt, Benjamin and Olofsson, Joel and Prentice, Julia},
	year         = {2014},
	volume       = {6},
	number       = {1, 2014},
	pages        = {114--135},
}

@inProceedings{pilan-etal-2014-rule-210940,
	title        = {Rule-based and machine learning approaches for second language sentence-level readability},
	abstract     = {We present approaches for the identification
of sentences understandable by second
language learners of Swedish, which
can be used in automatically generated exercises based on corpora. In this work we
merged methods and knowledge from machine
learning-based readability research,
from rule-based studies of Good Dictionary
Examples and from second language
learning syllabuses. The proposed selection
methods have also been implemented
as a module in a free web-based language
learning platform. Users can use
different parameters and linguistic filters
to personalize their sentence search with
or without a machine learning component
assessing readability. The sentences selected
have already found practical use as
multiple-choice exercise items within the
same platform. Out of a number of deep
linguistic indicators explored, we found
mainly lexical-morphological and semantic
features informative for second language
sentence-level readability. We obtained
a readability classification accuracy
result of 71%, which approaches the performance of other models used in similar
tasks. Furthermore, during an empirical
evaluation with teachers and students,
about seven out of ten sentences selected
were considered understandable, the rule-based approach slightly outperforming the
method incorporating the machine learning
model.},
	booktitle    = {Proceedings of the Ninth Workshop on Innovative Use of NLP for Building Educational Applications, June 26, 2014 Baltimore, Maryland, USA},
	author       = {Pilán, Ildikó and Volodina, Elena and Johansson, Richard},
	year         = {2014},
	ISBN         = {978-1-941643-03-7},
	pages        = {174----184},
}

@inProceedings{ahlberg-etal-2014-swedish-210083,
	title        = {Swedish FrameNet++ The Beginning of the End and the End of the Beginning},
	booktitle    = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014},
	author       = {Ahlberg, Malin and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Friberg Heppin, Karin and Johansson, Richard and Kokkinakis, Dimitrios and Olsson, Leif-Jöran and Uppström, Jonatan},
	year         = {2014},
}

@inProceedings{adesam-etal-2014-koala-211376,
	title        = {Koala – Korp’s Linguistic Annotations Developing an infrastructure for text-based research with high-quality annotations},
	booktitle    = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014},
	author       = {Adesam, Yvonne and Borin, Lars and Bouma, Gerlof and Forsberg, Markus and Johansson, Richard},
	year         = {2014},
}

@inProceedings{gunther-etal-2014-rtrgo-201512,
	title        = {RTRGO: Enhancing the GU-MLT-LT System for Sentiment Analysis of Short Messages},
	abstract     = {This paper describes the enhancements made to our GU-MLT-LT system (Günther and Furrer, 2013) for the SemEval-2014 re-run of the SemEval-2013 shared task on sentiment analysis in Twitter. The changes include the usage of a Twitter-specific tokenizer, additional features and sentiment lexica, feature weighting and random subspace learning. The improvements result in an increase of 4.18 F-measure points on this year’s Twitter test set, ranking 3rd.
},
	booktitle    = {Proceedings of the 8th International Workshop on Semantic Evaluation (SemEval 2014) August 23-24, 2014 Dublin, Ireland},
	author       = {Günther, Tobias and Vancoppenolle, Jean and Johansson, Richard},
	year         = {2014},
	ISBN         = {978-1-941643-24-2},
	pages        = {497--502},
}

@article{johansson-2014-automatic-201874,
	title        = {Automatic Expansion of the Swedish FrameNet Lexicon},
	abstract     = {We evaluate several lexicon-based and corpus-based methods to automatically induce new lexical units for the Swedish FrameNet, and we see that the best-performing setup uses a combination of both types of methods. A particular challenge for Swedish is the absence of a lexical resource such as WordNet; however, we show that the semantic network SALDO, which is organized according to lexicographical principles quite different from those of WordNet, is very useful for our purposes.},
	journal      = {Constructions and Frames},
	author       = {Johansson, Richard},
	year         = {2014},
	volume       = {6},
	number       = {1},
	pages        = {92--113},
}

@article{borin-johansson-2014-kulturomik-192931,
	title        = {Kulturomik: Att spana efter språkliga och kulturella förändringar i digitala textarkiv},
	journal      = {Historia i en digital värld},
	author       = {Borin, Lars and Johansson, Richard},
	year         = {2014},
}