Skip to main content
Språkbanken Text is a part of Språkbanken.

BibTeX

@inProceedings{qwaider(abukwaik)-etal-2020-arabic-291768,
	title        = {An Arabic Tweets Sentiment Analysis Dataset (ATSAD) using Distant Supervision and Self Training},
	abstract     = {As the number of social media users increases, they express their thoughts, needs, socialise and publish their opinions. For good social media sentiment analysis, good quality resources are needed, and the lack of these resources is particularly evident for languages other than English, in particular Arabic. The available Arabic resources lack of from either the size of the corpus or the quality of the annotation. In this paper, we present an Arabic Sentiment Analysis Corpus collected from Twitter, which contains 36K tweets labelled into positive and negative. We employed distant supervision and self-training approaches into the corpus to annotate it. Besides, we release an 8K tweets manually annotated as a gold standard. We evaluated the corpus intrinsically by comparing it to human classification and pre-trained sentiment analysis models. Moreover, we apply extrinsic evaluation methods exploiting sentiment analysis task and achieve an accuracy of 86%.},
	booktitle    = {Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools with a Shared Task on Offensive Language Detection (OSACT4-2020) at Language Resources and Evaluation Conference (LREC 2020), Marseille, 11–16 May 2020 / Hend Al-Khalifa, Walid Magdy, Kareem Darwish, Tamer Elsayed, Hamdy Mubarak (Editors)},
	author       = {Qwaider (abu kwaik), Chatrine (kathrein) and Chatzikyriakidis, Stergios and Dobnik, Simon and Johansson, Richard and Saad, Motaz},
	year         = {2020},
	publisher    = {European Language Resources Association (ELRA)},
	address      = {Marseille, France},
	ISBN         = {979-10-95546-51-1},
}

@inProceedings{johansson-adesam-2020-training-293365,
	title        = {Training a Swedish Constituency Parser on Six Incompatible Treebanks},
	abstract     = {We  investigate  a  transition-based  parser  that  usesEukalyptus,  a  function-tagged  constituent  treebank  for  Swedish  which  includesdiscontinuous  constituents.   In  addition,  we  show  that  the  accuracy  of  this  parser  can  be  improved  by  using  a  multitask  learning architecture that makes it possible to train the parser on additional treebanks that use other annotation models.},
	booktitle    = {Proceedings of the 12th International Conference on Language Resources and Evaluation (LREC 2020)},
	author       = {Johansson, Richard and Adesam, Yvonne},
	year         = {2020},
	publisher    = {European Language Resources Association (ELRA)},
}