@inProceedings{francis-2024-variation-342620, title = {Variation between Credible and Non-Credible News Across Topics}, abstract = {‘Fake News’ continues to undermine trust in modern journalism and politics. Despite con- tinued efforts to study fake news, results have been conflicting. Previous attempts to analyse and combat fake news have largely focused on distinguishing fake news from truth, or differ- entiating between its various sub-types (such as propaganda, satire, misinformation, etc.) This paper conducts a linguistic and stylistic analy- sis of fake news, focusing on variation between various news topics. It builds on related work identifying features from discourse and linguis- tics in deception detection by analysing five distinct news topics: Economy, Entertainment, Health, Science, and Sports. The results em- phasize that linguistic features vary between credible and deceptive news in each domain and highlight the importance of adapting clas- sification tasks to accommodate variety-based stylistic and linguistic differences in order to achieve better real-world performance.}, booktitle = {The First International Conference on Natural Language Processing and Artificial Intelligence for Cyber Security}, author = {Francis, Emilie}, year = {2024}, publisher = {NLPAICS’2024}, address = {Lancaster, U.K.}, pages = {86--96}, } @inProceedings{masciolini-etal-2024-synthetic-338288, title = {Synthetic-Error Augmented Parsing of Swedish as a Second Language: Experiments with Word Order}, abstract = {Ungrammatical text poses significant challenges for off-the-shelf dependency parsers. In this paper, we explore the effectiveness of using synthetic data to improve performance on essays written by learners of Swedish as a second language. Due to their relevance and ease of annotation, we restrict our initial experiments to word order errors. To do that, we build a corrupted version of the standard Swedish Universal Dependencies (UD) treebank Talbanken, mimicking the error patterns and frequency distributions observed in the Swedish Learner Language (SweLL) corpus. We then use the MaChAmp (Massive Choice, Ample tasks) toolkit to train an array of BERT-based dependency parsers, fine-tuning on different combinations of original and corrupted data. We evaluate the resulting models not only on their respective test sets but also, most importantly, on a smaller collection of sentence-correction pairs derived from SweLL. Results show small but significant performance improvements on the target domain, with minimal decline on normative data.}, booktitle = {Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024, May 25, 2024, Torino, Italia}, author = {Masciolini, Arianna and Francis, Emilie and Szawerna, Maria Irena}, year = {2024}, publisher = {ELRA and ICCL}, address = {Torino, Italy}, ISBN = {978-2-493814-20-3}, }