Skip to main content
Språkbanken Text is a part of Språkbanken.

BibTeX

@inProceedings{mogren-johansson-2017-character-256929,
	title        = {Character-based Recurrent Neural Networks for Morphological Relational Reasoning},
	abstract     = {We present a model for predicting word forms based on
	    morphological relational reasoning with analogies. While
	    previous work has explored tasks such as morphological inflection
	    and reinflection, these models rely on an explicit enumeration
	    of morphological features, which may not be available in all cases.
	    
	    To address the task of predicting a word form given a demo
	      relation (a pair of word forms) and a query word, we
	    devise a character-based recurrent neural network architecture
	    using three separate encoders and a decoder.
	    
	    We also investigate a multiclass learning setup, where the
	    prediction of the relation type label is used as an auxiliary task.
	    Our results show that the exact form can be predicted for
	    English with an accuracy of 94.7%. For Swedish, which has a more
	    complex morphology with more inflectional patterns for nouns and
	    verbs, the accuracy is 89.3%. We also show that using the
	    auxiliary task of learning the relation type speeds up convergence
	    and improves the prediction accuracy for the word generation task.},
	booktitle    = {Proceedings of the First Workshop on Subword and Character Level Models in NLP},
	author       = {Mogren, Olof and Johansson, Richard},
	year         = {2017},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA, United States},
}

@inProceedings{oepen-etal-2017-2017-264156,
	title        = {The 2017 Shared Task on Extrinsic Parser Evaluation. Towards a Reusable Community Infrastructure},
	abstract     = {The 2017 Shared Task on Extrinsic Parser
Evaluation (EPE 2017) seeks to provide
better estimates of the relative utility of
different types of dependency representa-
tions for a variety of downstream applica-
tions that depend centrally on the analysis
of grammatical structure. EPE 2017 de-
fi
nes a generalized notion of lexicalized
syntactico-semantic dependency represen-
tations and provides a common interchange
format to three state-of-the-art downstream
applications, viz. biomedical event extrac-
tion, negation resolution, and
fi
ne-grained
opinion analysis. As a
fi
rst step towards
building a generic and extensible infras-
tructure for extrinsic parser evaluation, the
downstream applications have been gener-
alized to support a broad range of diverese
dependency representations (including di-
vergent sentence and token boundaries)
and to allow fully automated re-training
and evaluation for a speci
fi
c collection of
parser outputs. Nine teams participated
in EPE 2017, submitting 49 distinct runs
that encompass many different families
of dependency representations, distinct ap-
proaches to preprocessing and parsing, and
various types and volumes of training data.},
	booktitle    = {Proceedings of the 2017 Shared Task on Extrinsic Parser Evaluation at the Fourth International Conference on Dependency Linguistics and the 15th International Conference on Parsing Technologies},
	author       = {Oepen, Stephan and Øvrelid, Lilja and Björne, Jari and Johansson, Richard and Lapponi, Emanuele and Ginter, Filip and Velldal, Erik},
	year         = {2017},
	publisher    = {Association for Computational Linguistics (ACL)},
	address      = {Stroudsburg, USA},
	ISBN         = {978-1-945626-74-6},
}

@inProceedings{johansson-2017-2017-264160,
	title        = {EPE 2017: The Trento–Gothenburg Opinion Extraction System},
	abstract     = {We give an overview of one of the
three downstream systems in the Extrin-
sic Parser Evaluation shared task of 2017:
the Trento–Gothenburg system for opin-
ion extraction. We describe the modi
fi
ca-
tions required to make the system agnos-
tic to its input dependency representation,
and discuss how the input affects the vari-
ous submodules of the system. The results
of the EPE shared task are presented and
discussed, and to get a more detailed un-
derstanding of the effects of the dependen-
cies we run two of the submodules sepa-
rately. The results suggest that the module
where the effects are strongest is the opin-
ion holder extraction module, which can
be explained by the fact that this module
uses several dependency-based features.
For the other modules, the effects are hard
to measure.},
	booktitle    = {Proceedings of the 2017 Shared Task on Extrinsic Parser Evaluation at the Fourth International Conference on Dependency Linguistics and the 15th International Conference on Parsing Technologies},
	author       = {Johansson, Richard},
	year         = {2017},
	publisher    = {Association for Computational Linguistics (ACL) },
	address      = {Stroudsburg, USA},
	ISBN         = {978-1-945626-74-6 },
}

@inProceedings{nietopina-johansson-2017-training-261938,
	title        = {Training Word Sense Embeddings With Lexicon-based Regularization},
	abstract     = {We propose to improve word sense embeddings by enriching an automatic corpus-based method with lexicographic data. Information from a lexicon is introduced into the learning algorithm’s objective function through a regularizer. The incorporation of lexicographic data yields embeddings that are able to reflect expertdefined word senses, while retaining the robustness, high quality, and coverage of automatic corpus-based methods. These properties are observed in a manual inspection of the semantic clusters that different degrees of regularizer strength create in the vector space. Moreover, we evaluate the sense embeddings in two
downstream applications: word sense disambiguation and semantic frame prediction, where they outperform simpler approaches. Our results show that a corpusbased model balanced with lexicographic data learns better representations and improve their performance in downstream tasks},
	booktitle    = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers), Taipei, Taiwan, November 27 – December 1, 2017},
	author       = {Nieto Piña, Luis and Johansson, Richard},
	year         = {2017},
	publisher    = {Asian Federation of Natural Language Processing },
	ISBN         = {978-1-948087-00-1},
}

@inProceedings{adouane-etal-2017-romanized-252493,
	title        = {Romanized Arabic and Berber Detection Using PPM and Dictionary Methods},
	abstract     = {Arabic is one of the Semitic languages written in Arabic script in its standard form. However, the recent rise of social media and new technologies has contributed considerably to the emergence of a new form of Arabic, namely Arabic written in Latin scripts, often called Romanized Arabic or Arabizi. While Romanized Arabic is an informal language, Berber or Tamazight uses Latin script in its standard form with some orthography differences depending on the country it is used in. Both these languages are under-resourced and unknown to the state-of-the-art language identifiers. In this paper, we present a language automatic identifier for both Romanized Arabic and Romanized Berber. We also describe the built linguistic resources (large dataset and lexicons) including a wide range of Arabic dialects (Algerian, Egyptian, Gulf, Iraqi, Levantine, Moroccan and Tunisian dialects) as well as the most popular Berber varieties (Kabyle, Tashelhit, Tarifit, Tachawit and Tamzabit). We use the Prediction by Partial Matching (PPM) and dictionary-based methods. The methods reach a macro-average F-Measure of 98.74% and 97.60% respectively.},
	booktitle    = {13th ACS/IEEE International Conference on Computer Systems and Applications AICCSA 2016},
	author       = {Adouane, Wafia and Semmar, Nasredine and Johansson, Richard},
	year         = {2017},
	address      = {Morocco},
	ISBN         = { 978-150904320-0},
}