Hoppa till huvudinnehåll

BibTeX

@inProceedings{lange-ljunglof-2020-learning-291243,
	title        = {Learning Domain-specific Grammars from a Small Number of Examples},
	abstract     = {In this paper we investigate the problem of grammar inference from a different perspective. The common approach is to try to infer a grammar directly from example sentences, which either requires a large training set or suffers from bad accuracy. We instead view it as a problem of grammar restriction or sub-grammar extraction. We start from a large-scale resource grammar and a small number of examples, and find a sub-grammar that still covers all the examples. To do this we formulate the problem as a constraint satisfaction problem, and use an existing constraint solver to find the optimal grammar. We have made experiments with English, Finnish, German, Swedish and Spanish, which show that 10–20 examples are often sufficient to learn an interesting domain grammar. Possible applications include computer-assisted language learning, domain-specific dialogue systems, computer games, Q/A-systems, and others.},
	booktitle    = {12th International Conference on Agents and Artificial Intelligence - Volume 1: NLPinAI},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2020},
	publisher    = {SciTePress},
	ISBN         = {978-989-758-395-7},
}

@inProceedings{lange-ljunglof-2018-demonstrating-274016,
	title        = {Demonstrating the MUSTE Language Learning Environment},
	abstract     = {We present a language learning application that relies on grammars to model the learning outcome. Based on this concept we can provide a powerful framework for language learning exercises with an intuitive user interface and a high reliability. Currently the application aims to augment existing language classes and support students by improving the learner attitude and the general learning outcome. Extensions beyond that scope are promising and likely to be added in the future.},
	booktitle    = {NLP4CALL 2018, the 7th Workshop on NLP for Computer Assisted Language Learning, Stockholm, 7th November 2018; published as issue 152 of Linköping Electronic Conference Proceedings},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2018},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7685-173-9},
}

@inProceedings{lange-ljunglof-2018-mulle-274014,
	title        = {MULLE: A grammar-based Latin language learning tool to supplement the classroom setting},
	abstract     = {MULLE is a tool for language learning
that focuses on teaching Latin as a foreign
language. It is aimed for easy integration
into the traditional classroom setting
and syllabus, which makes it distinct
from other language learning tools that
provide standalone learning experience. It
uses grammar-based lessons and embraces
methods of gamification to improve the
learner motivation. The main type of exercise
provided by our application is to practice
translation, but it is also possible to
shift the focus to vocabulary or morphology
training.},
	booktitle    = {NLPTEA 2018, the 5th Workshop on Natural Language Processing Techniques for Educational Applications, Melbourne, Australia, 19th July 2018},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2018},
	publisher    = {Association for Computational Linguistics},
	address      = {Melbourne, Australia},
}

@inProceedings{lange-ljunglof-2018-putting-274013,
	title        = {Putting Control into Language Learning},
	abstract     = {Controlled Natural Languages (CNLs) have many applications including document authoring, automatic reasoning on texts and reliable machine translation, but their application is not limited to these areas. We explore a new application area of CNLs, the use of CNLs in computer-assisted language learning. In this paper we present a a web application for language learning using CNLs as well as a detailed description of the properties of the family of CNLs it uses.},
	booktitle    = {CNL 2018, the 6th International Workshop on Controlled Natural Language, Maynooth, Co Kildare, 27-28th August 2018; published as volume 304 of Frontiers in Artificial Intelligence and Applications},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2018},
	publisher    = {IOS Press},
	address      = {Amsterdam},
	ISBN         = {978-1-61499-904-1},
}

@misc{alfter-etal-2019-proceedings-285613,
	title        = {Proceedings of the 8th Workshop on Natural Language Processing for Computer Assisted Language Learning (NLP4CALL 2019), September 30, Turku Finland},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promote development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other.

The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools.

The NLP4CALL workshop series is aimed at bringing together competences from these areas for sharing experiences and brainstorming around the future of the field.
},
	author       = {Alfter, David and Volodina, Elena and Borin, Lars and Pilán, Ildikó and Lange, Herbert},
	year         = {2019},
	publisher    = {Linköping University Electronic Press, Linköpings universitet},
	address      = {Linköping},
	ISBN         = {978-91-7929-998-9},
}

@misc{alfter-etal-2020-proceedings-300071,
	title        = {Proceedings of the 9th Workshop on Natural Language Processing for Computer Assisted Language Learning 2020},
	abstract     = {The workshop series on Natural Language Processing (NLP) for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, the integration of insights from Second Language Acquisition (SLA) research, and the promotion of “Computational SLA” through setting up Second Language research infrastructures.
This collection presents four selected papers describing use of Language Technology for language learning.},
	author       = {Alfter, David and Volodina, Elena and Pilán, Ildikó and Lange, Herbert and Borin, Lars},
	year         = {2020},
	publisher    = {Linköping University Electronic Press},
	address      = {Linköping},
	ISBN         = {978-91-7929-732-9},
}

@incollection{lange-ljunglof-2021-learning-305146,
	title        = {Learning Domain-Specific Grammars from a Small Number of Examples},
	abstract     = {In this chapter we investigate the problem of grammar learning from a perspective that diverges from previous approaches. These prevailing approaches to learning grammars usually attempt to infer a grammar directly from example corpora without any additional information. This either requires a large training set or suffers from bad accuracy. We instead view learning grammars as a problem of grammar restriction or subgrammar extraction. We start from a large-scale grammar (called a resource grammar) and a small number of example sentences, and find a subgrammar that still covers all the examples. To accomplish this, we formulate the problem as a constraint satisfaction problem, and use a constraint solver to find the optimal grammar. We created experiments with English, Finnish, German, Swedish, and Spanish, which show that 10–20 examples are often sufficient to learn an interesting grammar for a specific application. We also present two extensions to this basic method: we include negative examples and allow rules to be merged. The resulting grammars can more precisely cover specific linguistic phenomena. Our method, together with the extensions, can be used to provide a grammar learning system for specific applications. This system is easy-to-use, human-centric, and can be used by non-syntacticians. Based on this grammar learning method, we can build applications for computer-assisted language learning and interlingual communication, which rely heavily on the knowledge of language and domain experts who often lack the competence to develop required grammars themselves.},
	booktitle    = {Natural Language Processing in Artificial Intelligence—NLPinAI 2020},
	author       = {Lange, Herbert and Ljunglöf, Peter},
	year         = {2021},
	publisher    = {Springer International Publishing},
	ISBN         = {978-3-030-63787-3},
}

@inProceedings{lange-2022-metadata-329234,
	title        = {Metadata Formats for Learner Corpora: Case Study and Discussion},
	booktitle    = {Proceedings of the 11th Workshop on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL 2022) },
	author       = {Lange, Herbert},
	year         = {2022},
	publisher    = {Linköping University Electronic press},
	address      = {Linköping},
}

@inProceedings{lange-aznar-2022-refco-329235,
	title        = {RefCo and its Checker: Improving Language Documentation Corpora’s Reusability Through a Semi-Automatic Review Process},
	booktitle    = {Proceedings of the Thirteenth Language Resources and Evaluation Conference},
	author       = {Lange, Herbert and Aznar, Jocelyn},
	year         = {2022},
	publisher    = {European Language Resources Association},
	address      = {Marseille, France},
}

@book{lange-2020-learning-295656,
	title        = {Learning Language (with) Grammars: From Teaching Latin to Learning Domain-Specific Grammars},
	abstract     = {This thesis describes work in three areas: grammar engineering, computer-assisted language learning and grammar learning. These three parts are connected by the concept of a grammar-based language learning application. Two types of grammars are of concern. The first we call resource grammars, extensive descriptions a natural languages. Part I focuses on this kind of grammars. The other are domain-specific or application-specific grammars. These grammars only describe a fragment of natural language that is determined by the domain of a certain application. Domain-specific grammars are relevant for Part II and Part III. Another important distinction is between humans learning a new natural language using computational grammars (Part II) and computers learning grammars from example sentences (Part III). Part I of this thesis focuses on grammar engineering and grammar testing. It describes the development and evaluation of a computational resource grammar for Latin. Latin is known for its rich morphology and free word order, both have to be handled in a computationally efficient way. A special focus is on methods how computational grammars can be evaluated using corpus data. Such an evaluation is presented for the Latin resource grammar. Part II, the central part, describes a computer-assisted language learning application based on domain-specific grammars. The language learning application demonstrates how computational grammars can be used to guide the user input and how language learning exercises can be modeled as grammars. This allows us to put computational grammars in the center of the design of language learning exercises used to help humans learn new languages. Part III, the final part, is dedicated to a method to learn domain- or application-specific grammars based on a wide-coverage grammar and small sets of example sentences. Here a computer is learning a grammar for a fragment of a natural language from example sentences, potentially without any additional human intervention. These learned grammars can be based e.g. on the Latin resource grammar described in Part II and used as domain-specific lesson grammars in the language learning application described Part II.},
	author       = {Lange, Herbert},
	year         = {2020},
	publisher    = {University of Gothenburg},
	address      = {Gothenburg},
	ISBN         = {978-91-7833-987-7},
}

@book{lange-2018-computer-269655,
	title        = {Computer-Assisted Language Learning with Grammars. A Case Study on Latin Learning},
	abstract     = {Learning new languages has a high relevance in today’s society with a globalized
economy and the freedom to move abroad for work, study or other reasons.
In this context new methods to teach and learn languages with the help of
modern technology are becoming more relevant besides traditional language
classes.
This work presents a new approach to combine a traditional language class
with a modern computer-based approach for teaching. As a concrete example
a web application to help teach and learn Latin was developed.},
	author       = {Lange, Herbert},
	year         = {2018},
	address      = {Gothenburg, Sweden},
}

@inProceedings{lange-2017-implementation-266386,
	title        = {Implementation of a Latin grammar in grammatical framework},
	abstract     = {© 2017 Copyright held by the owner/author(s). In this paper we present work in developing a computerized grammar for the Latin language. It demonstrates the principles and challenges in developing a grammar for a natural language in a modern grammar formalism. The grammar presented here provides a useful resource for natural language processing applications in different fields. It can be easily adopted for language learning and use in language technology for Cultural Heritage like translation applications or to support post-correction of document digitization.},
	booktitle    = {ACM International Conference Proceeding Series},
	author       = {Lange, Herbert},
	year         = {2017},
}

@article{wamprechtshammer-etal-2022-quest-334932,
	title        = {QUEST: Guidelines and Specifications for the Assessment of Audiovisual, Annotated Language Data },
	abstract     = {

This guide documents the main results of the joint project “QUEST: Quality – Established: Qualitätsstandards und Kurationskriterien für audiovisuelle annotierte Sprachdaten”, which was carried out between 2019 and 2022 and funded by the German Federal Ministry of Education and Research (BMBF). The project consortium consisted of the University of Hamburg, the Leibniz-Centre General Linguistics (ZAS) in Berlin, the Archive for Spoken German (AGD)/Institute for the German Language (IDS) in Mannheim and the University of Cologne. The BBAW in Berlin was also involved through the ‘Endangered Languages Documentation Programme’.

Main aim of the project was to maximise the potential for reuse and secondary use of audiovisual, annotated language data. For this purpose, QUEST developed quality standards and curation criteria for several reuse scenarios such as ‘Language Documentation’, ‘Learner Corpora’, ‘Interpreted Corpora’, ‘Sign Language’, ‘Language Community’, ‘Ethnography’ and ‘Oral History’. Based on this, quality assurance procedures (an online questionnaire and automated quality checks) were implemented and tested on authentic data.

In summary, the guidelines document provides definitions and examples for the quality criteria elaborated in QUEST, which are intended to provide information on the reuse potential of audiovisual, annotated data and aims to give overview of the objects and workflows of the evaluation system. Quality standards and curation criteria are linked to data maturity levels and suggestions are made on how to evaluate each criterion.
},
	journal      = {Working Papers in Corpus Linguistics and Digital Technologies: Analyses and Methodology},
	author       = {Wamprechtshammer, Anna and Arestau, Elena and Aznar, Jocelyn and Hedeland, Hanna and Isard, Amy and Khait, Ilya and Lange, Herbert and Majka, Nicole and Rau, Felix},
	year         = {2022},
	volume       = {8},
}