@inProceedings{wilkens-etal-2023-tcfle-337441, title = {TCFLE-8: a Corpus of Learner Written Productions for French as a Foreign Language and its Application to Automated Essay Scoring}, abstract = {Automated Essay Scoring (AES) aims to automatically assess the quality of essays. Automation enables large-scale assessment, improvaements in consistency, reliability, and standardization. Those characteristics are of particular relevance in the context of language certification exams. However, a major bottleneck in the development of AES systems is the availability of corpora, which, unfortunately, are scarce, especially for languages other than English. In this paper, we aim to foster the development of AES for French by providing the TCFLE-8 corpus, a corpus of 6.5k essays collected in the context of the Test de Connaissance du Français (TCF - French Knowledge Test) certification exam. We report the strict quality procedure that led to the scoring of each essay by at least two raters according to the levels of the Common European Framework of Reference for Languages (CEFR) and to the creation of a balanced corpus. In addition, we describe how linguistic properties of the essays relate to the learners' proficiency in TCFLE-8. We also advance the state-of-the-art performance for the AES task in French by experimenting with two strong baselines (i.e., RoBERTa and feature-based). Finally, we discuss the challenges of AES using TCFLE-8.}, booktitle = {EMNLP 2023 - 2023 Conference on Empirical Methods in Natural Language Processing, Proceedings}, author = {Wilkens, Rodrigo and Pintard, Alice and Alfter, David and Folny, Vincent and François, Thomas}, year = {2023}, ISBN = {9798891760608}, }