@inProceedings{Johansson-Richard2012-156400, title = {Semantic Role Labeling with the Swedish FrameNet}, abstract = {We present the first results on semantic role labeling using the Swedish FrameNet, which is a lexical resource currently in development. Several aspects of the task are investigated, including the selection of machine learning features, the effect of choice of syntactic parser, and the ability of the system to generalize to new frames and new genres. In addition, we evaluate two methods to make the role label classifier more robust: cross-frame generalization and cluster-based features. Although the small amount of training data limits the performance achievable at the moment, we reach promising results. In particular, the classifier that extracts the boundaries of arguments works well for new frames, which suggests that it already at this stage can be useful in a semi-automatic setting.}, booktitle = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12); Istanbul, Turkey; May 23-25}, author = {Johansson, Richard and Friberg Heppin, Karin and Kokkinakis, Dimitrios}, year = {2012}, ISBN = {978-2-9517408-7-7}, pages = {3697--3700}, } @inProceedings{Ghosh-Sucheta2012-156399, title = {Improving the Recall of a Discourse Parser by Constraint-based Postprocessing}, abstract = {We describe two constraint-based methods that can be used to improve the recall of a shallow discourse parser based on conditional random field chunking. These methods use a set of natural structural constraints as well as others that follow from the annotation guidelines of the Penn Discourse Treebank. We evaluated the resulting systems on the standard test set of the PDTB and achieved a rebalancing of precision and recall with improved F-measures across the board. This was especially notable when we used evaluation metrics taking partial matches into account; for these measures, we achieved F-measure improvements of several points.}, booktitle = {Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC'12); Istanbul, Turkey; May 23-25}, author = {Ghosh, Sucheta and Johansson, Richard and Riccardi, Giuseppe and Tonelli, Sara}, year = {2012}, ISBN = {978-2-9517408-7-7}, pages = {2791--2794}, } @inProceedings{Johansson-Richard2012-156993, title = {Non-atomic Classification to Improve a Semantic Role Labeler for a Low-resource Language}, abstract = {Semantic role classification accuracy for most languages other than English is constrained by the small amount of annotated data. In this paper, we demonstrate how the frame-to-frame relations described in the FrameNet ontology can be used to improve the performance of a FrameNet-based semantic role classifier for Swedish, a low-resource language. In order to make use of the FrameNet relations, we cast the semantic role classification task as a non-atomic label prediction task. The experiments show that the cross-frame generalization methods lead to a 27% reduction in the number of errors made by the classifier. For previously unseen frames, the reduction is even more significant: 50%. }, booktitle = {Proceedings of the First Joint Conference on Lexical and Computational Semantics (*SEM); June 7-8; Montréal, Canada}, author = {Johansson, Richard}, year = {2012}, pages = {95--99}, } @inProceedings{Moschitti-Alessandro2012-156401, title = {Modeling Topic Dependencies in Hierarchical Text Categorization}, abstract = {In this paper, we encode topic dependencies in hierarchical multi-label Text Categorization (TC) by means of rerankers. We represent reranking hypotheses with several innovative kernels considering both the structure of the hierarchy and the probability of nodes. Additionally, to better investigate the role of category relationships, we consider two interesting cases: (i) traditional schemes in which node-fathers include all the documents of their child-categories; and (ii) more general schemes, in which children can include documents not belonging to their fathers. The extensive experimentation on Reuters Corpus Volume 1 shows that our rerankers inject effective structural semantic dependencies in multi-classifiers and significantly outperform the state of the art.}, booktitle = {Proceedings of the 50th Annual Meeting of the Association for Computational Linguistics (ACL 2012); Jeju, Korea; July 8-14}, author = {Moschitti, Alessandro and Ju, Qi and Johansson, Richard}, year = {2012}, pages = {759--767}, } @inProceedings{Borin-Lars2012-157213, title = {Transferring Frames: Utilization of Linked Lexical Resources}, abstract = {In our experiment, we evaluate the transferability of frames from Swedish to Finnish in parallel corpora. We evaluate both the theoretical possibility of transferring frames and the possibility of performing it using available lexical resources. We add the frame information to an extract of the Swedish side of the Kotus and JRC-Acquis corpora using an automatic frame labeler and copy it to the Finnish side. We focus on evaluating the results to get an estimation on how often the parallel sentences can be said to express the same frame. This sheds light to the questions: Are the same situations in the two languages expressed using different frames, i.e. are the frames transferable even in theory? How well can the frame information of running text be transferred from language to another? }, booktitle = {Proceedings of the Workshop on Inducing Linguistic Structure Submission (WILS)}, author = {Borin, Lars and Forsberg, Markus and Johansson, Richard and Muhonen, Kristiina and Purtonen, Tanja and Voionmaa, Kaarlo}, year = {2012}, pages = {8--15}, } @inProceedings{Borin-Lars2012-157338, title = {Search Result Diversification Methods to Assist Lexicographers}, abstract = {We show how the lexicographic task of finding informative and diverse example sentences can be cast as a search result diversification problem, where an objective based on relevance and diversity is maximized. This problem has been studied intensively in the information retrieval community during recent years, and efficient algorithms have been devised. We finally show how the approach has been implemented in a lexicographic project, and describe the relevance and diversity functions used in that context. }, booktitle = {Proceedings of the 6th Linguistic Annotation Workshop}, author = {Borin, Lars and Forsberg, Markus and Friberg Heppin, Karin and Johansson, Richard and Kjellandsson, Annika}, year = {2012}, pages = {113--117}, } @inProceedings{Ghosh-Sucheta2012-157440, title = {Global Features for Shallow Discourse Parsing}, abstract = {A coherently related group of sentences may be referred to as a discourse. In this paper we address the problem of parsing coherence relations as defined in the Penn Discourse Tree Bank (PDTB). A good model for discourse structure analysis needs to account both for local dependencies at the token-level and for global dependencies and statistics. We present techniques on using inter-sentential or sentence-level (global), data-driven, non-grammatical features in the task of parsing discourse. The parser model follows up previous approach based on using token-level (local) features with conditional random fields for shallow discourse parsing, which is lacking in structural knowledge of discourse. The parser adopts a two-stage approach where first the local constraints are applied and then global constraints are used on a reduced weighted search space (n-best). In the latter stage we experiment with different rerankers trained on the first stage n-best parses, which are generated using lexico-syntactic local features. The two-stage parser yields significant improvements over the best performing model of discourse parser on the PDTB corpus.}, booktitle = {Proceedings of the 13th Annual Meeting of the Special Interest Group on Discourse and Dialogue (SIGDIAL)}, author = {Ghosh, Sucheta and Riccardi, Giuseppe and Johansson, Richard}, year = {2012}, pages = {150--159}, } @inProceedings{Bennaceur-Amel2012-160393, title = {Machine Learning for Emergent Middleware}, abstract = {Highly dynamic and heterogeneous distributed systems are challenging today's middleware technologies. Existing middleware paradigms are unable to deliver on their most central promise, which is offering interoperability. In this paper, we argue for the need to dynamically synthesise distributed system infrastructures according to the current operating environment, thereby generating "Emergent Middleware'' to mediate interactions among heterogeneous networked systems that interact in an ad hoc way. The paper outlines the overall architecture of Enablers underlying Emergent Middleware, and in particular focuses on the key role of learning in supporting such a process, spanning statistical learning to infer the semantics of networked system functions and automata learning to extract the related behaviours of networked systems.}, booktitle = {Proceedings of the Joint Workshop on Intelligent Methods for Software System Engineering (JIMSE)}, author = {Bennaceur, Amel and Howar, Falk and Issarny, Valérie and Johansson, Richard and Moschitti, Alessandro and Spalazzese, Romina and Steffen, Bernhard and Sykes, Daniel}, year = {2012}, volume = {Accepted}, } @inProceedings{Johansson-Richard2012-163602, title = {Bridging the Gap between Two Different Swedish Treebanks}, abstract = {We present two simple adaptation methods to train a dependency parser in the situation when there are multiple treebanks available, and these treebanks are annotated according to different linguistic conventions. To test the methods, we train parsers on the Talbanken and Syntag treebanks of Swedish. The results show that the methods are effective for low-to-medium training set sizes.}, booktitle = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)}, author = {Johansson, Richard}, year = {2012}, volume = {Accepted}, } @inProceedings{Volodina-Elena2012-165961, title = {Semi-automatic selection of best corpus examples for Swedish: Initial algorithm evaluation.}, abstract = {The study presented here describes the results of the initial evaluation of two sorting approaches to automatic ranking of corpus examples for Swedish. Representatives from two potential target user groups have been asked to rate top three hits per approach for sixty search items from the point of view of the needs of their professional target groups, namely second/foreign language (L2) teachers and lexicographers. This evaluation has shown, on the one hand, which of the two approaches to example rating (called in the text below algorithms #1 and #2) performs better in terms of finding better examples for each target user group; and on the other hand, which features evaluators associate with good examples. It has also facilitated statistic analysis of the “good” versus “bad” examples with reference to the measurable features, such as sentence length, word length, lexical frequency profiles, PoS constitution, dependency structure, etc. with a potential to find out new reliable classifiers.}, booktitle = {Proceedings of the SLTC 2012 workshop on NLP for CALL, Lund, 25th October, 2012. }, author = {Volodina, Elena and Johansson, Richard and Johansson Kokkinakis, Sofie}, year = {2012}, number = {080}, pages = {59--70}, }