@inProceedings{mogren-johansson-2017-character-256929, title = {Character-based Recurrent Neural Networks for Morphological Relational Reasoning}, abstract = {We present a model for predicting word forms based on morphological relational reasoning with analogies. While previous work has explored tasks such as morphological inflection and reinflection, these models rely on an explicit enumeration of morphological features, which may not be available in all cases. To address the task of predicting a word form given a demo relation (a pair of word forms) and a query word, we devise a character-based recurrent neural network architecture using three separate encoders and a decoder. We also investigate a multiclass learning setup, where the prediction of the relation type label is used as an auxiliary task. Our results show that the exact form can be predicted for English with an accuracy of 94.7%. For Swedish, which has a more complex morphology with more inflectional patterns for nouns and verbs, the accuracy is 89.3%. We also show that using the auxiliary task of learning the relation type speeds up convergence and improves the prediction accuracy for the word generation task.}, booktitle = {Proceedings of the First Workshop on Subword and Character Level Models in NLP}, author = {Mogren, Olof and Johansson, Richard}, year = {2017}, publisher = {Association for Computational Linguistics}, address = {Stroudsburg, PA, United States}, } @inProceedings{oepen-etal-2017-2017-264156, title = {The 2017 Shared Task on Extrinsic Parser Evaluation. Towards a Reusable Community Infrastructure}, abstract = {The 2017 Shared Task on Extrinsic Parser Evaluation (EPE 2017) seeks to provide better estimates of the relative utility of different types of dependency representa- tions for a variety of downstream applica- tions that depend centrally on the analysis of grammatical structure. EPE 2017 de- fi nes a generalized notion of lexicalized syntactico-semantic dependency represen- tations and provides a common interchange format to three state-of-the-art downstream applications, viz. biomedical event extrac- tion, negation resolution, and fi ne-grained opinion analysis. As a fi rst step towards building a generic and extensible infras- tructure for extrinsic parser evaluation, the downstream applications have been gener- alized to support a broad range of diverese dependency representations (including di- vergent sentence and token boundaries) and to allow fully automated re-training and evaluation for a speci fi c collection of parser outputs. Nine teams participated in EPE 2017, submitting 49 distinct runs that encompass many different families of dependency representations, distinct ap- proaches to preprocessing and parsing, and various types and volumes of training data.}, booktitle = {Proceedings of the 2017 Shared Task on Extrinsic Parser Evaluation at the Fourth International Conference on Dependency Linguistics and the 15th International Conference on Parsing Technologies}, author = {Oepen, Stephan and Øvrelid, Lilja and Björne, Jari and Johansson, Richard and Lapponi, Emanuele and Ginter, Filip and Velldal, Erik}, year = {2017}, publisher = {Association for Computational Linguistics (ACL)}, address = {Stroudsburg, USA}, ISBN = {978-1-945626-74-6}, } @inProceedings{johansson-2017-2017-264160, title = {EPE 2017: The Trento–Gothenburg Opinion Extraction System}, abstract = {We give an overview of one of the three downstream systems in the Extrin- sic Parser Evaluation shared task of 2017: the Trento–Gothenburg system for opin- ion extraction. We describe the modi fi ca- tions required to make the system agnos- tic to its input dependency representation, and discuss how the input affects the vari- ous submodules of the system. The results of the EPE shared task are presented and discussed, and to get a more detailed un- derstanding of the effects of the dependen- cies we run two of the submodules sepa- rately. The results suggest that the module where the effects are strongest is the opin- ion holder extraction module, which can be explained by the fact that this module uses several dependency-based features. For the other modules, the effects are hard to measure.}, booktitle = {Proceedings of the 2017 Shared Task on Extrinsic Parser Evaluation at the Fourth International Conference on Dependency Linguistics and the 15th International Conference on Parsing Technologies}, author = {Johansson, Richard}, year = {2017}, publisher = {Association for Computational Linguistics (ACL) }, address = {Stroudsburg, USA}, ISBN = {978-1-945626-74-6 }, } @inProceedings{nietopina-johansson-2017-training-261938, title = {Training Word Sense Embeddings With Lexicon-based Regularization}, abstract = {We propose to improve word sense embeddings by enriching an automatic corpus-based method with lexicographic data. Information from a lexicon is introduced into the learning algorithm’s objective function through a regularizer. The incorporation of lexicographic data yields embeddings that are able to reflect expertdefined word senses, while retaining the robustness, high quality, and coverage of automatic corpus-based methods. These properties are observed in a manual inspection of the semantic clusters that different degrees of regularizer strength create in the vector space. Moreover, we evaluate the sense embeddings in two downstream applications: word sense disambiguation and semantic frame prediction, where they outperform simpler approaches. Our results show that a corpusbased model balanced with lexicographic data learns better representations and improve their performance in downstream tasks}, booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Taipei, Taiwan, November 27 – December 1, 2017}, author = {Nieto Piña, Luis and Johansson, Richard}, year = {2017}, publisher = {Asian Federation of Natural Language Processing }, ISBN = {978-1-948087-00-1}, } @inProceedings{adouane-etal-2017-romanized-252493, title = {Romanized Arabic and Berber Detection Using PPM and Dictionary Methods}, abstract = {Arabic is one of the Semitic languages written in Arabic script in its standard form. However, the recent rise of social media and new technologies has contributed considerably to the emergence of a new form of Arabic, namely Arabic written in Latin scripts, often called Romanized Arabic or Arabizi. While Romanized Arabic is an informal language, Berber or Tamazight uses Latin script in its standard form with some orthography differences depending on the country it is used in. Both these languages are under-resourced and unknown to the state-of-the-art language identifiers. In this paper, we present a language automatic identifier for both Romanized Arabic and Romanized Berber. We also describe the built linguistic resources (large dataset and lexicons) including a wide range of Arabic dialects (Algerian, Egyptian, Gulf, Iraqi, Levantine, Moroccan and Tunisian dialects) as well as the most popular Berber varieties (Kabyle, Tashelhit, Tarifit, Tachawit and Tamzabit). We use the Prediction by Partial Matching (PPM) and dictionary-based methods. The methods reach a macro-average F-Measure of 98.74% and 97.60% respectively.}, booktitle = {13th ACS/IEEE International Conference on Computer Systems and Applications AICCSA 2016}, author = {Adouane, Wafia and Semmar, Nasredine and Johansson, Richard}, year = {2017}, address = {Morocco}, ISBN = { 978-150904320-0}, }