BibTeX

@article{fraser-etal-2019-predicting-282807,
	title        = {Predicting MCI Status From Multimodal Language Data Using Cascaded Classifiers},
	abstract     = {Recent work has indicated the potential utility of automated language analysis for the detection of mild cognitive impairment (MCI). Most studies combining language processing and machine learning for the prediction of MCI focus on a single language task; here, we consider a cascaded approach to combine data from multiple language tasks. A cohort of 26 MCI participants and 29 healthy controls completed three language tasks: picture description, reading silently, and reading aloud. Information from each task is captured through different modes (audio, text, eye-tracking, and comprehension questions). Features are extracted from each mode, and used to train a series of cascaded classifiers which output predictions at the level of features, modes, tasks, and finally at the overall session level. The best classification result is achieved through combining the data at the task level (AUC = 0.88, accuracy = 0.83). This outperforms a classifier trained on neuropsychological test scores (AUC = 0.75, accuracy = 0.65) as well as the "early fusion" approach to multimodal classification (AUC = 0.79, accuracy = 0.70). By combining the predictions from the multimodal language classifier and the neuropsychological classifier, this result can be further improved to AUC = 0.90 and accuracy = 0.84. In a correlation analysis, language classifier predictions are found to be moderately correlated (rho = 0.42) with participant scores on the Rey Auditory Verbal Learning Test (RAVLT). The cascaded approach for multimodal classification improves both system performance and interpretability. This modular architecture can be easily generalized to incorporate different types of classifiers as well as other heterogeneous sources of data (imaging, metabolic, etc.).},
	journal      = {Frontiers in Aging Neuroscience},
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Eckerström, Marie and Öhman, Fredrik and Kokkinakis, Dimitrios},
	year         = {2019},
	volume       = {11},
	number       = {205},
}

@inProceedings{fraser-etal-2019-multilingual-280280,
	title        = {Multilingual prediction of Alzheimer’s disease through domain adaptation and concept-based language modelling},
	abstract     = {There is growing evidence that changes in speech and language may be early markers of dementia, but much of the previous NLP work in this area has been limited by the size of the available datasets. Here, we compare several methods of domain adaptation to augment a small French dataset of picture descriptions (n = 57) with a much larger English dataset (n = 550), for the task of automatically distinguishing participants with dementia from controls. The first challenge is to identify a set of features that transfer across languages; in addition to previously used features based on information units, we introduce a new set of features to model the order in which information units are produced by dementia patients and controls. These concept-based language model features improve classification performance in both English and French separately, and the best result (AUC = 0.89) is achieved using the multilingual training set with a combination of information and language model features.},
	booktitle    = {Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers), June 2 - June 7, 2019, Minneapolis, Minnesota /  Jill Burstein, Christy Doran, Thamar Solorio (Editors) },
	author       = {Fraser, Kathleen and Linz, Nicklas and Lundholm Fors, Kristina and Rudzicz, Frank and König, Alexandra and Alexandersson, Jan and Robert, Philippe and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = {Association for Computational Linguistics},
	address      = {Stroudsburg, PA },
	ISBN         = {978-1-950737-13-0},
}

@inProceedings{lundholmfors-etal-2019-reading-284036,
	title        = {Reading and mild cognitive impairment},
	abstract     = {In the present study, we investigated the discriminatory power of eye-tracking features in distinguishing between individuals with mild cognitive impairment (MCI) and healthy controls (HC). The eye movements of the study participants were recorded at two different time points, 18 months apart. Using a machine learning approach with leave-one-out cross-validation, we were able to discriminate between the groups with 73.6 AUC. However, somewhat surprisingly the classification was less successful using data from the second recording session, which might be attributed to the non-static nature of cognitive status. Still, the outcome suggests that eye-tracking measures can be exploited as useful markers of MCI.
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Lundholm Fors, Kristina and Antonsson, Malin and Kokkinakis, Dimitrios and Fraser, Kathleen},
	year         = {2019},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{kokkinakis-etal-2019-multifaceted-278217,
	title        = {A Multifaceted Corpus for the Study of Cognitive Decline in a Swedish Population},
	abstract     = {A potential, early-stage diagnostic marker for neurodegenerative diseases, such as Alzheimer’s disease, is the onset of language disturbances which is often characterized by subtle word-finding difficulties, impaired spontaneous speech, slight speech hesitancy, object naming difficulties and phonemic errors. Connected speech provides valuable information in a non-invasive and easy-to-assess way for determining aspects of the severity of language impairment. Data elicitation is an established method of obtaining highly constrained samples of connected speech that allows us to study the intricate interactions between various linguistic levels and cognition. In the paper, we describe the collection and content of a corpus consisting of spontaneous Swedish speech from individuals with Mild Cognitive Impairment (MCI), with Subjective Cognitive Impairment SCI) and healthy, age-matched controls (HC). The subjects were pooled across homogeneous subgroups for age and education, a sub-cohort from the Gothenburg-MCI study. The corpus consists of high quality audio recordings (including transcriptions) of several tasks, namely:
(i)	a picture description task – the Cookie-theft picture, an ecologically valid approximation to spontaneous discourse that has been widely used to elicitate speech from speakers with different types of language and communication disorders; 
(ii)	a read aloud task (including registration of eye movements) – where participants read a text from the IREST collection twice, both on a computer screen (while eye movements are registered), and the same text on paper;
(iii)	a complex planning task – a subset of executive functioning that tests the ability to identify, organize and carry out (complex) steps and elements that are required to achieve a goal;
(iv)	a map task – a spontaneous speech production/semi-structured conversation in which the participants are encouraged to talk about a predefined, cooperative task-oriented topic;
(v)	a semantic verbal fluency task – category animals: where participants have to produce as many words as possible from a category in a given time (60 seconds). The fluency tests require an elaborate retrieval of words from conceptual (semantic) and lexical (phonetic) memory involving specific areas of the brain in a restricted timeframe. 
All samples are produced by Swedish speakers after obtaining written consent approved by the local ethics committee. Tasks (i) and (ii) have been collected twice in a diachronically apart period of 18 months between 2016 and 2018.
The corpus represents an approximation to speech in a natural setting: The material for elicitation is controlled in the sense that the speakers are given specific tasks to talk about, and they do so in front of a microphone. The corpus may serve as a basis for many linguistic and/or speech technological investigations and has being already used for various investigations of language features.},
	booktitle    = {CLARe4 : Corpora for Language and Aging Research, 27 February – 1 March 2019, Helsinki, Finland},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Fraser, Kathleen and Eckerström, Marie and Horn, Greta and Themistocleous, Charalambos},
	year         = {2019},
}

@article{fraser-etal-2019-multilingual-270713,
	title        = {Multilingual word embeddings for the assessment of narrative speech in mild cognitive impairment},
	abstract     = {We analyze the information content of narrative speech samples from individuals with mild cognitive impairment (MCI), in both English and Swedish, using a combination of supervised and unsupervised learning techniques. We extract information units using topic models trained on word embeddings in monolingual and multilingual spaces, and find that the multilingual approach leads to significantly better classification accuracies than training on the target language alone. In many cases, we find that augmenting the topic model training corpus with additional clinical data from a different language is more effective than training on additional monolingual data from healthy controls. Ultimately we are able to distinguish MCI speakers from healthy older adults with accuracies of up to 63% (English) and 72% (Swedish) on the basis of information content alone. We also compare our method against previous results measuring information content in Alzheimer's disease, and report an improvement over other topic-modeling approaches. Furthermore, our results support the hypothesis that subtle differences in language can be detected in narrative speech, even at the very early stages of cognitive decline, when scores on screening tools such as the Mini-Mental State Exam are still in the “normal” range.},
	journal      = {Computer Speech and Language},
	author       = {Fraser, Kathleen and Lundholm Fors, Kristina and Kokkinakis, Dimitrios},
	year         = {2019},
	volume       = {53},
	pages        = {121--139},
}
Sidansvarig: sb-webb