Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@article{themistocleous-2019-dialect-341526,
	title        = {Dialect Classification From a Single Sonorant Sound Using Deep Neural Networks},
	abstract     = {During spoken communication, the fine acoustic properties of human speech can reveal vital sociolinguistic and linguistic information about speakers and thus, these properties can function as reliable identification markers of speakers' identity. One key piece of information speech reveals is speakers' dialect. The first aim of this study is to provide a machine learning method that can distinguish the dialect from acoustic productions of sonorant sounds. The second aim is to determine the classification accuracy of dialects from the temporal and spectral information of a single sonorant sound and the classification accuracy of dialects using additional co-articulatory information from the adjacent vowel. To this end, this paper provides two classification approaches. The first classification approach aims to distinguish two Greek dialects, namely Athenian Greek, the prototypical form of Standard Modern Greek and Cypriot Greek using measures of temporal and spectral information (i.e., spectral moments) from four sonorant consonants /m n l r/. The second classification study aims to distinguish the dialects using coarticulatory information (e.g., formants frequencies F1 - F5, F0, etc.) from the adjacent vowel in addition to spectral and temporal information from sonorants. In both classification approaches, we have employed Deep Neural Networks, which we compared with Support Vector Machines, Random Forests, and Decision Trees. The findings show that neural networks distinguish the two dialects using a combination of spectral moments, temporal information, and formant frequency information with 81% classification accuracy, which is a 14% accuracy gain over employing temporal properties and spectral moments alone. In conclusion, Deep Neural Networks can classify the dialect from single consonant productions, making them capable of identifying sociophonetic shibboleths.},
	journal      = {FRONTIERS IN COMMUNICATION},
	author       = {Themistocleous, Charalambos},
	year         = {2019},
	volume       = {4},
}

@article{fyndanis-themistocleous-2019-there-268753,
	title        = {Are there prototypical associations between time frames and aspectual values? Evidence from Greek aphasia and healthy ageing},
	abstract     = {Time reference, which has been found to be selectively impaired in agrammatic aphasia, is often interwoven with grammatical aspect. A recent study on Russian aphasia found that time reference and aspect interact: Past reference was less impaired when tested within a perfective aspect context (compared to when tested within an imperfective aspect context), and reference to the non-past was less impaired when tested within an imperfective aspect context (compared to when tested within a perfective aspect context). To explain this pattern, the authors argued that there are prototypical associations between time frames and aspectual values. The present study explores the relationship between time reference and aspect focusing on Greek aphasia and healthy ageing and using a sentence completion task that crosses time reference and aspect. The findings do not support prototypical matches between different time frames and aspectual values. Building on relevant studies, we propose that patterns of performance of healthy or language-impaired speakers on constrained tasks tapping different combinations of time frames with aspectual values should reflect the relative frequency of these combinations in a given language. The analysis of the results at the individual level revealed a double dissociation, which indicates that a given time frame–aspectual value combination may be relatively easy to process for some persons with aphasia but demanding for some others.},
	journal      = {Clinical Linguistics & Phonetics},
	author       = {Fyndanis, Valantis and Themistocleous, Charalambos},
	year         = {2019},
	volume       = {33},
	number       = {1-2},
	pages        = {191--217},
}

@inProceedings{themistocleous-kokkinakis-2019-speech-289021,
	title        = {Speech and Mild Cognitive Impairment detection},
	abstract     = {It is of great importance to detect objective markers that can enable the early and fast identification of individuals with Mild Cognitive Impairment (MCI) from healthy individuals to inform, patient care, family and treatment planning. Connected speech productions can offer such markers. This study analyses recordings from picture description tasks by Swedish individuals with MCI and healthy control individuals (HC) and shows that voice quality, periodicity, and speech rate distinguish individuals with MCI from HC. 
},
	booktitle    = {Proceedings of the 10th International Conference of Experimental Linguistics, 25-27 September 2019, Lisbon, Portugal},
	editor       = {Antonis Botinis},
	author       = {Themistocleous, Charalambos and Kokkinakis, Dimitrios},
	year         = {2019},
	publisher    = { ExLing Society},
	ISBN         = {978-618-84585-0-5},
}

@inProceedings{kokkinakis-etal-2019-multifaceted-278217,
	title        = {A Multifaceted Corpus for the Study of Cognitive Decline in a Swedish Population},
	abstract     = {A potential, early-stage diagnostic marker for neurodegenerative diseases, such as Alzheimer’s disease, is the onset of language disturbances which is often characterized by subtle word-finding difficulties, impaired spontaneous speech, slight speech hesitancy, object naming difficulties and phonemic errors. Connected speech provides valuable information in a non-invasive and easy-to-assess way for determining aspects of the severity of language impairment. Data elicitation is an established method of obtaining highly constrained samples of connected speech that allows us to study the intricate interactions between various linguistic levels and cognition. In the paper, we describe the collection and content of a corpus consisting of spontaneous Swedish speech from individuals with Mild Cognitive Impairment (MCI), with Subjective Cognitive Impairment SCI) and healthy, age-matched controls (HC). The subjects were pooled across homogeneous subgroups for age and education, a sub-cohort from the Gothenburg-MCI study. The corpus consists of high quality audio recordings (including transcriptions) of several tasks, namely:
(i)	a picture description task – the Cookie-theft picture, an ecologically valid approximation to spontaneous discourse that has been widely used to elicitate speech from speakers with different types of language and communication disorders; 
(ii)	a read aloud task (including registration of eye movements) – where participants read a text from the IREST collection twice, both on a computer screen (while eye movements are registered), and the same text on paper;
(iii)	a complex planning task – a subset of executive functioning that tests the ability to identify, organize and carry out (complex) steps and elements that are required to achieve a goal;
(iv)	a map task – a spontaneous speech production/semi-structured conversation in which the participants are encouraged to talk about a predefined, cooperative task-oriented topic;
(v)	a semantic verbal fluency task – category animals: where participants have to produce as many words as possible from a category in a given time (60 seconds). The fluency tests require an elaborate retrieval of words from conceptual (semantic) and lexical (phonetic) memory involving specific areas of the brain in a restricted timeframe. 
All samples are produced by Swedish speakers after obtaining written consent approved by the local ethics committee. Tasks (i) and (ii) have been collected twice in a diachronically apart period of 18 months between 2016 and 2018.
The corpus represents an approximation to speech in a natural setting: The material for elicitation is controlled in the sense that the speakers are given specific tasks to talk about, and they do so in front of a microphone. The corpus may serve as a basis for many linguistic and/or speech technological investigations and has being already used for various investigations of language features.},
	booktitle    = {CLARe4 : Corpora for Language and Aging Research, 27 February – 1 March 2019, Helsinki, Finland},
	author       = {Kokkinakis, Dimitrios and Lundholm Fors, Kristina and Fraser, Kathleen and Eckerström, Marie and Horn, Greta and Themistocleous, Charalambos},
	year         = {2019},
}