@inProceedings{lindh-akesson-2014-effect-218075, title = {Effect of the Double-Filtering effect on Automatic Voice Comparison}, abstract = {In forensic casework today it is not uncommon to receive material recorded with mobile phones or other handheld recording devices. From experience we know most people do not treat recordings with as much care as a person well versed in audio technology. Especially given the varying circumstances under which the material can be recorded. Thus it is important we learn more about what sort of acoustic effects take place under particular conditions and how these effects can influence Automatic Voice Comparison (AVC). The current study aims at evaluating the effects of recording material consisting of what could be described as ‘doublefiltered’ sound, henceforth referred to as DF, e.g. when a phone call is recorded using a handheld recorder placed in the vicinity of the mobile device. This filtering effect constitutes sound transmitted via GSM communication (1st filter) which then passes an indeterminable distance through the air before being captured by another recording device, such as a mobile phone or handheld recorder’s microphone (2nd filter). This effect affects the energy in the signal. The energy decreases in both the low and the high frequencies, while the middle frequencies are boosted. In this study we have used a database consisting of 150 female speakers of Swedish, all students of speech and language pathology. The recordings were made in a sound treated recording booth using a setup of one computer equipped with an internal MAudio soundcard and a high quality headset microphone. Each recording consists of solicited spontaneous speech together with read speech material (Swedish standard reading passage called ‘Ett svårt fall’). Each speaker is informed and encouraged to finish the task at their own pace. Mean duration of the full recording among the speakers was 69.3 seconds (std 16 seconds).}, booktitle = {Proceedings of IAFPA 2014. International Association for Forensic Phonetics and Acoustics Annual Conference 31 August - 3 September 2014}, author = {Lindh, Jonas and Åkesson, Joel}, year = {2014}, pages = {2}, } @inProceedings{hu-lindh-2014-effects-203082, title = {Effects of initial sounds on the perception of Chinese disyllable tones by Swedish students of Chinese}, abstract = {ABSTRACT This paper extends previous research on the effects of initial sounds on perception of Chinese disyllable tones. A perception test was performed on Swedish adult students of Chinese using disyllable words (most previous studies have been made using solely monosyllable words). The main results indicate that voiced initial sounds e.g. [l] have a strong connection to the tone confusion pattern Tone 2 perceived as Tone 3. On the contrary, a voiceless aspirated initial sound e.g. [th] is mostly connected to misidentifications between Tone 3 to Tone 2. Unvoiced unaspirated initial sounds affect tone perception heavily, especially when they occur in the second syllable of a disyllabic word. }, booktitle = {2014 International Conference on Phonetic Research and Language Learning (ICPRLL) & English Phonetic Conference in China (EPCC)}, author = {Hu, Guohua and Lindh, Jonas}, year = {2014}, } @article{morrison-etal-2014-likelihood-188784, title = {Likelihood ratio calculation for a disputed-utterance analysis with limited available data}, abstract = {We present a disputed-utterance analysis using relevant data, quantitative measurements and statistical models to calculate likelihood ratios. The acoustic data were taken from an actual forensic case in which the amount of data available to train the statistical models was small and the data point from the disputed word was far out on the tail of one of the modelled distributions. A procedure based on single multivariate Gaussian models for each hypothesis led to an unrealistically high likelihood ratio value with extremely poor reliability, but a procedure based on Hotelling’s T2 statistic and a procedure based on calculating a posterior predictive density produced more acceptable results. The Hotelling’s T2 procedure attempts to take account of the sampling uncertainty of the mean vectors and covariance matrices due to the small number of tokens used to train the models, and the posterior-predictive-density analysis integrates out the values of the mean vectors and covariance matrices as nuisance parameters. Data scarcity is common in forensic speech science and we argue that it is important not to accept extremely large calculated likelihood ratios at face value, but to consider whether such values can be supported given the size of the available data and modelling constraints.}, journal = {Speech Communication}, author = {Morrison, Geoffrey Stewart and Lindh, Jonas and Curran, James M}, year = {2014}, volume = {58}, pages = {81--90}, }