Skip to main content

BibTeX

@book{alfter-2021-exploring-304548,
	title        = {Exploring natural language processing for single-word and multi-word lexical complexity from a second language learner perspective},
	abstract     = {In this thesis, we investigate how natural language processing (NLP) tools and techniques can be applied to vocabulary aimed at second language learners of Swedish in order to classify vocabulary items into different proficiency levels suitable for learners of different levels.  

In the first part, we use feature-engineering to represent words as vectors and feed these vectors into machine learning algorithms in order to (1) learn CEFR labels from the input data and (2) predict the CEFR level of unseen words.
Our experiments corroborate the finding that feature-based classification models using 'traditional' machine learning still outperform deep learning architectures in the task of deciding how complex a word is. 

In the second part, we use crowdsourcing as a technique to generate ranked lists of multi-word expressions using both experts and non-experts (i.e. language learners). Our experiment shows that non-expert and expert rankings are highly correlated, suggesting that non-expert intuition can be seen as on-par with expert knowledge, at least in the chosen experimental configuration.

The main practical output of this research comes in two forms: prototypes and resources. We have implemented various prototype applications for (1) the automatic prediction of words based on the feature-engineering machine learning method, (2) language learning applications using graded word lists, and (3) an annotation tool for the manual annotation of expressions across a variety of linguistic factors.},
	author       = {Alfter, David},
	year         = {2021},
	publisher    = {Göteborgs universitet},
	ISBN         = {978-91-87850-79-0},
}

@edited_book{alfter-etal-2021-proceedings-311727,
	title        = {Proceedings of the 10th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2021)},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language
Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural
Language Processing and Speech Technologies in CALL systems and exploring the theoretical and
methodological issues arising in this connection. The latter includes, among others, the integration of
insights from Second Language Acquisition (SLA) research, and the promotion of “Computational
SLA” through setting up Second Language research infrastructures.},
	editor       = {Alfter, David and Volodina, Elena and Pilán , Ildikó  and Graën, Johannes and Borin, Lars},
	year         = {2021},
	publisher    = {Linköping Electronic Conference Proceedings 177},
	address      = {Linköping, Sweden},
	ISBN         = {978-91-7929-625-4},
}

@article{alfter-etal-2021-crowdsourcing-311721,
	title        = {Crowdsourcing Relative Rankings of Multi-Word Expressions: Experts versus Non-Experts},
	abstract     = {In this study we investigate to which degree experts and non-experts agree on questions of difficulty in a crowdsourcing experiment. We ask non-experts (second language learners of Swedish) and two groups of experts (teachers of Swedish as a second/foreign language and CEFR experts) to rank multi-word expressions in a crowdsourcing experiment. We find that the resulting rankings by all the three tested groups correlate to a very high degree, which suggests that judgments produced in a comparative setting are not influenced by professional insights into Swedish as a second language.},
	journal      = {Northern European Journal of Language Technology (NEJLT)},
	author       = {Alfter, David and Lindström Tiedemann, Therese  and Volodina, Elena},
	year         = {2021},
	volume       = {7},
	number       = {1},
}