@inProceedings{stemle-etal-2019-working-319453, title = {Working together towards an ideal infrastructure for language learner corpora}, abstract = {In this article we provide an overview of first-hand experiences and vantage points for best practices from projects in seven European countries dedicated to learner corpus research (LCR) and the creation of language learner corpora. The corpora and tools involved in LCR are becoming more and more important, as are careful preparation and easy retrieval and reusability of corpora and tools. But the lack of commonly agreed solutions for many aspects of LCR, interoperability between learner corpora and the exchange of data from different learner corpus projects remains a challenge. We show how concepts like metadata, anonymization, error taxonomies and linguistic annotations as well as tools, toolchains and data formats can be individually challenging and how the challenges can be solved. }, booktitle = {Widening the Scope of Learner Corpus Research. Selected papers from the fourth Learner Corpus Research Conference. Corpora and Language in Use – Proceedings 5 / Andrea Abel, Aivars Glaznieks, Verena Lyding and Lionel Nicolas (eds.)}, author = {Stemle, Egon and Boyd, Adriane and Janssen, Maarten and Preradović, Nives Mikelić and Rosen, Alexandr and Rosén, Dan and Volodina, Elena}, year = {2019}, publisher = {PUL, Presses Universitaires de Louvain}, address = {Louvain-la-Neuve }, ISBN = {978-2-87558-868-5}, } @article{volodina-etal-2019-swell-285609, title = {The SweLL Language Learner Corpus: From Design to Annotation}, abstract = {The article presents a new language learner corpus for Swedish, SweLL, and the methodology from collection and pesudonymisation to protect personal information of learners to annotation adapted to second language learning. The main aim is to deliver a well-annotated corpus of essays written by second language learners of Swedish and make it available for research through a browsable environment. To that end, a new annotation tool and a new project management tool have been implemented, – both with the main purpose to ensure reliability and quality of the final corpus. In the article we discuss reasoning behind metadata selection, principles of gold corpus compilation and argue for separation of normalization from correction annotation.}, journal = {Northern European Journal of Language Technology}, author = {Volodina, Elena and Granstedt, Lena and Matsson, Arild and Megyesi, Beáta and Pilán , Ildikó and Prentice, Julia and Rosén, Dan and Rudebeck, Lisa and Schenström, Carl-Johan and Sundberg, Gunlög and Wirén, Mats}, year = {2019}, volume = {6}, pages = {67--104}, } @inProceedings{volodina-etal-2019-svala-285617, title = {SVALA: an Annotation Tool for Learner Corpora generating parallel texts}, abstract = {Learner corpora are actively used for research on Language Acquisition and in Learner Corpus Research (LCR). The data is, however, very expensive to collect and manually annotate, and includes steps like anonymization, normalization, error annotation, linguistic annotation. In the past, projects often re - used tools from a number of different projects for the above steps. As a result, various input and output formats between the tools needed to be converted, which increased the complexity of the task. In the present project, we are developing a tool that handles all of the above - mentioned steps in one environment maintaining a stable interpretable format between the steps. A distinguishing feature of the tool is that users work in a usual environment (plain text) while the tool visualizes all performed edits via a graph that links an original learner text with an edited one, token by token.}, booktitle = {Learner Corpus Research conference (LCR-2019), Warsaw, 12-14 September 2019, Book of abstracts}, author = {Volodina, Elena and Matsson, Arild and Rosén, Dan and Wirén, Mats }, year = {2019}, } @inProceedings{wiren-etal-2018-svala-285624, title = {SVALA: Annotation of Second-Language Learner Text Based on Mostly Automatic Alignment of Parallel Corpora}, abstract = {Annotation of second-language learner text is a cumbersome manual task which in turn requires interpretation to postulate the intended meaning of the learner’s language. This paper describes SVALA, a tool which separates the logical steps in this process while providing rich visual support for each of them. The first step is to pseudonymize the learner text to fulfil the legal and ethical requirements for a distributable learner corpus. The second step is to correct the text, which is carried out in the simplest possible way by text editing. During the editing, SVALA automatically maintains a parallel corpus with alignments between words in the learner source text and corrected text, while the annotator may repair inconsistent word alignments. Finally, the actual labelling of the corrections (the postulated errors) is performed. We describe the objectives, design and workflow of SVALA, and our plans for further development. }, booktitle = {Selected papers from the CLARIN Annual Conference 2018, Pisa, 8-10 October 2018}, editor = {Inguna Skadina and Maria Eskevich}, author = {Wirén, Mats and Matsson, Arild and Rosén, Dan and Volodina, Elena}, year = {2018}, publisher = {Linköping University Electronic Press, Linköpings universitet}, address = {Linköpings universitet}, ISBN = {978-91-7685-034-3}, } @inProceedings{rosen-etal-2018-error-275363, title = {Error Coding of Second-Language Learner Texts Based on Mostly Automatic Alignment of Parallel Corpora. }, abstract = {Error coding of second-language learner text, that is, detecting, correcting and annotating errors, is a cumbersome task which in turn requires interpretation of the text to decide what the errors are. This paper describes a system with which the annotator corrects the learner text by editing it prior to the actual error annotation. During the editing, the system automatically generates a parallel corpus of the learner and corrected texts. Based on this, the work of the annotator consists of three independent tasks that are otherwise often conflated: correcting the learner text, repairing inconsistent alignments, and performing the actual error annotation.}, booktitle = {Proceedings of CLARIN-2018 conference, 8-10 October 2018, Pisa, Italy}, author = {Rosén, Dan and Wirén, Mats and Volodina, Elena}, year = {2018}, } @inProceedings{volodina-etal-2018-annotation-275361, title = {Annotation of learner corpora: first SweLL insights.}, abstract = {This is a concise description of experiences with learner corpus annotation performed within SweLL project. Experiences include work with legal issues, anonymization, error annotation, normalization and questions relating to quality of annotation. }, booktitle = {Proceedings of SLTC 2018, Stockholm, October 7-9, 2018}, author = {Volodina, Elena and Granstedt, Lena and Megyesi, Beáta and Prentice, Julia and Rosén, Dan and Schenström, Carl-Johan and Sundberg, Gunlög and Wirén, Mats}, year = {2018}, } @inProceedings{megyesi-etal-2018-learner-275359, title = {Learner Corpus Anonymization in the Age of GDPR: Insights from the Creation of a Learner Corpus of Swedish}, abstract = {This paper reports on the status of learner corpus anonymization for the ongoing research infrastructure project SweLL. The main project aim is to deliver and make available for research a well-annotated corpus of essays written by second language (L2) learners of Swedish. As the practice shows, annotation of learner texts is a sensitive process demanding a lot of compromises between ethical and legal demands on the one hand, and research and technical demands, on the other. Below, is a concise description of the current status of pseudonymization of language learner data to ensure anonymity of the learners, with numerous examples of the above-mentioned compromises.}, booktitle = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018) at SLTC, Stockholm, 7th November 2018}, editor = {Ildikó Pilán and Elena Volodina and David Alfter and Lars Borin}, author = {Megyesi, Beata and Granstedt, Lena and Johansson, Sofia and Prentice, Julia and Rosén, Dan and Schenström, Carl-Johan and Sundberg, Gunlög and Wirén , Mats and Volodina, Elena}, year = {2018}, publisher = {Linköping University Electronic Press}, address = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{malm-etal-2018-uneek-267351, title = {Uneek: a Web Tool for Comparative Analysis of Annotated Texts}, abstract = {In this paper, we present Uneek, a web based linguistic tool that performs set operations on raw or annotated texts. The tool may be used for automatic distributional analysis, and for disambiguating polysemy with a method that we refer to as semi-automatic uniqueness differentiation (SUDi). Uneek outputs the intersection and differences between their listed attributes, e.g. POS, dependencies, word forms, frame elements. This makes it an ideal supplement to methods for lumping or splitting in frame development processes. In order to make some of Uneek’s functions more clear, we employ SUDi on a small data set containing the polysemous verb "bake". As of now, Uneek may only run two files at a time, but there are plans to develop the tool so that it may simultaneously operate on multiple files. Finally, we relate the developmental plans for added functionality, to how such functions may support FrameNet work in the future.}, booktitle = {Proceedings of the LREC 2018 Workshop International FrameNetWorkshop 2018: Multilingual Framenets and Constructicons, 7-12 May 2018, Miyazaki (Japan) / [ed] Tiago Timponi Torrent, Lars Borin & Collin F. Baker, 2018}, author = {Malm, Per and Ahlberg, Malin and Rosén, Dan}, year = {2018}, ISBN = {979-10-95546-04-7}, } @inProceedings{borin-etal-2016-sparv-246053, title = {Sparv: Språkbanken’s corpus annotation pipeline infrastructure}, abstract = {Sparv is Språkbanken's corpus annotation pipeline infrastructure. The easiest way to use the pipeline is from its web interface with a plain text document. The pipeline uses in-house and external tools on the text to segment it into sentences and paragraphs, tokenise, tag parts-of-speech, look up in dictionaries and analyse compounds. The pipeline can also be run using a web API with XML results, and it is run locally at Språkbanken to prepare the documents in Korp, our corpus search tool. While the most sophisticated support is for modern Swedish, the pipeline supports 15 languages.}, booktitle = {SLTC 2016. The Sixth Swedish Language Technology Conference, Umeå University, 17-18 November, 2016}, author = {Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Rosén, Dan and Schäfer, Roland and Schumacher, Anne}, year = {2016}, } @book{rosen-2016-theory-231969, title = {Theory Exploration and Inductive Theorem Proving}, abstract = {We have built two state-of-the-art inductive theorem provers named HipSpec and Hipster. The main issue when automating proofs by induction is to discover essential helper lemmas. Our theorem provers use the technique theory exploration, which is a method to systematically discover interesting conclusions about a mathematical theory. We use the existing theory exploration system QuickSpec which conjectures properties for a program that seem to hold based on testing. The idea is to try to prove these explored conjectures together with the user-stated goal conjecture. By using this idea and connecting it with our previous work on Hip, the Haskell Inductive Prover, we were able to take new leaps in field of inductive theorem proving. Additionally, we have developed a benchmark suite named TIP, short for Tons of Inductive Problems, with benchmark problems for inductive theorem provers, and a tool box for converting and manipulating problems expressed in the TIP format. There were two main reasons to this initiative. Firstly, the inductive theorem proving field lacked a shared benchmark suite as well as a format. Secondly, the benchmarks that have been used were outdated: all contemporary provers would solve almost every problem. We have so far added hundreds of new challenges to the TIP suite to encourage further research. }, author = {Rosén, Dan}, year = {2016}, publisher = {Chalmers University of Technology}, address = {Göteborg}, } @inProceedings{claessen-rosen-2015-modulo-230822, title = {SAT modulo intuitionistic implications}, abstract = {We present a new method for solving problems in intuitionistic propositional logic, which involves the use of an incremental SAT-solver. The method scales to very large problems, and fits well into an SMT-based framework for interaction with other theories.}, booktitle = {Lecture Notes in Computer Science. Proceedings of the International Conferences on Logic for Programming, Artificial Intelligence and Reasoning (LPAR)}, author = {Claessen, Koen and Rosén, Dan}, year = {2015}, volume = {9450}, ISBN = {978-3-662-48898-0}, pages = {622--637}, } @inProceedings{johansson-etal-2014-hipster-216678, title = {Hipster: Integrating theory exploration in a proof assistant}, abstract = {This paper describes Hipster, a system integrating theory exploration with the proof assistant Isabelle/HOL. Theory exploration is a technique for automatically discovering new interesting lemmas in a given theory development. Hipster can be used in two main modes. The first is exploratory mode, used for automatically generating basic lemmas about a given set of datatypes and functions in a new theory development. The second is proof mode, used in a particular proof attempt, trying to discover the missing lemmas which would allow the current goal to be proved. Hipster's proof mode complements and boosts existing proof automation techniques that rely on automatically selecting existing lemmas, by inventing new lemmas that need induction to be proved. We show example uses of both modes.}, booktitle = {Lecture Notes in Computer Science}, author = {Johansson, Moa and Rosén, Dan and Smallbone, Nicholas and Claessen, Koen}, year = {2014}, volume = {8543 }, ISBN = {978-3-319-08433-6}, pages = {108--122}, } @inProceedings{claessen-etal-2015-tons-222739, title = {TIP: Tons of Inductive Problems}, abstract = {This paper describes our collection of benchmarks for inductive theorem provers. The recent spur of interest in automated inductive theorem proving has increased the demands for evaluation and comparison between systems. We expect the benchmark suite to continually grow as more problems are submitted by the community. New challenge problems will promote further development of provers which will greatly benefit both developers and users of inductive theorem provers.}, booktitle = {Lecture Notes in Artificial Intelligence: International Conference on Intelligent Computer Mathematics (CICM)}, author = {Claessen, Koen and Johansson, Moa and Rosén, Dan and Smallbone, Nicholas}, year = {2015}, volume = {9150}, ISBN = {978-3-319-20615-8}, pages = {332--336}, } @article{vytiniotis-etal-2013-halo-178613, title = {HALO: Haskell to Logic through Denotational Semantics}, abstract = {Even well-typed programs can go wrong in modern functional languages, by encountering a pattern-match failure, or simply returning the wrong answer. An increasingly-popular response is to allow programmers to write contracts that express semantic properties, such as crash-freedom or some useful post-condition. We study the static verification of such contracts. Our main contribution is a novel translation to first-order logic of both Haskell programs, and contracts written in Haskell, all justified by denotational semantics. This translation enables us to prove that functions satisfy their contracts using an off-the-shelf first-order logic theorem prover. }, journal = {Acm Sigplan Notices}, author = {Vytiniotis, D. and Jones, S. P. and Rosén, Dan and Claessen, Koen}, year = {2013}, volume = {48}, number = {1}, pages = {431--442}, } @misc{bernardy-etal-2014-compiling-195331, title = {Compiling Linear Logic using Continuations}, abstract = {As System F is the logical foundation of functional programming, it has long been expected that Classical Linear Logic (CLL) is the logical foundation of concurrent programming. In particular, thanks to an involutive negation, its language of propositions correspond to protocols. This means that CLL provides the principles to integrate concurrency into functional programming languages. Aiming towards this integration, we translate the concurrency features of CLL into continuations, essentially via a negation-model of CLL into System F. Practically, the translation can be used to embed CLL programs into functional languages such as Haskell. We explain the properties of the interface between the two languages. In particular, using an example, we illustrate how the par (⅋) operator can solve practical functional programming problems. }, author = {Bernardy, Jean-Philippe and Rosén, Dan and Smallbone, Nicholas}, year = {2014}, } @inProceedings{rosen-smallbone-2015-tools-230830, title = {Tip: Tools for inductive provers}, abstract = {TIP is a toolbox for users and developers of inductive provers. It consists of a large number of tools which can, for example, simplify an inductive problem, monomorphise it or find counterexamples to it. We are using TIP to help maintain a set of benchmarks for inductive theorem provers, where its main job is to encode aspects of the problem that are not natively supported by the respective provers. TIP makes it easier to write inductive provers, by supplying necessary tools such as lemma discovery which prover authors can simply import into their own prover.}, booktitle = {Lecture Notes in Computer Science. 20th International Conference on Logic for Programming, Artificial Intelligence, and Reasoning, LPAR 2015, Suva, Fiji, 24-28 November 2015}, author = {Rosén, Dan and Smallbone, Nicholas}, year = {2015}, volume = {9450}, ISBN = {978-3-662-48898-0}, pages = {219--232}, } @inProceedings{vytiniotis-etal-2013-halo-187462, title = {HALO: Haskell to logic through denotational semantics}, abstract = {Even well-typed programs can go wrong in modern functional languages, by encountering a pattern-match failure, or simply returning the wrong answer. An increasingly-popular response is to allow programmers to write contracts that express semantic properties, such as crash-freedom or some useful post-condition. We study the static verification of such contracts. Our main contribution is a novel translation to first-order logic of both Haskell programs, and contracts written in Haskell, all justified by denotational semantics. This translation enables us to prove that functions satisfy their contracts using an off-the-shelf first-order logic theorem prover.}, booktitle = {Conference Record of the Annual ACM Symposium on Principles of Programming Languages}, author = {Vytiniotis, Dimitrios and Peyton-Jones, Simon L. and Rosén, Dan and Claessen, Koen}, year = {2013}, ISBN = {978-1-4503-1832-7}, pages = {431--442}, } @inProceedings{claessen-etal-2013-automating-175485, title = {Automating Inductive Proofs using Theory Exploration}, abstract = {HipSpec is a system for automatically deriving and proving properties about functional programs. It uses a novel approach, combining theory exploration, counterexample testing and inductive theorem proving. HipSpec automatically generates a set of equational theorems about the available recursive functions of a program. These equational properties make up an algebraic specification for the program and can in addition be used as a background theory for proving additional user-stated properties. Experimental results are encouraging: HipSpec compares favourably to other inductive theorem provers and theory exploration systems.}, booktitle = {24th International Conference on Automated Deduction, Lake Placid, NY, USA, June 9-14, 2013}, author = {Claessen, Koen and Rosén, Dan and Johansson, Moa and Smallbone, Nicholas}, year = {2013}, volume = {7898}, ISBN = {978-3-642-38573-5}, pages = {392--406}, }