@article{nautsch-etal-2017-making-258734, title = {Making Likelihood Ratios Digestible for Cross-Application Performance Assessment}, abstract = {Performance estimation is crucial to the assessment of novel algorithms and systems. In detection error tradeoff (DET) diagrams, discrimination performance is solely assessed targeting one application, where cross-application performance considers risks resulting from decisions, depending on application constraints. For the purpose of interchangeability of research results across different application constraints, we propose to augment DET curves by depicting systems regarding their support of security and convenience levels. Therefore, application policies are aggregated into levels based on verbal likelihood ratio scales, providing an easy to use concept for business-to-business communication to denote operative thresholds. We supply a reference implementation in Python, an exemplary performance assessment on synthetic score distributions, and a fine-tuning scheme for Bayes decision thresholds, when decision policies are bounded rather than fix.}, journal = {IEEE Signal Processing Letters}, author = {Nautsch, A. and Meuwly, D. and Ramos, D. and Lindh, Jonas and Busch, C.}, year = {2017}, volume = {24}, number = {10}, pages = {1552--1556}, } @article{sundqvist-etal-2016-syllable-227628, title = {Syllable Repetition vs. Finger Tapping: Aspects of Motor Timing in 100 Healthy Adults.}, abstract = {In this study we systematically compared syllable repetition and finger tapping in healthy adults, and explored possible impacts of tempi, metronome, musical experience, and age on motor timing ability. One hundred healthy adults used finger-tapping and syllable repetition to perform an isochronous pulse in three different tempi, with and without a metronome. Results showed that the motor timing was more accurate with finger tapping than with syllable repetition in the slowest tempo, and the motor timing ability was better with the metronome than without. Persons with musical experience showed better motor timing accuracy than persons without such experience, and the timing asynchrony increased with increasing age. The slowest tempo 90 bpm posed extra challenges to the participants. We speculate that this pattern reflects the fact that the slow tempo lies outside the 3-8 Hz syllable rate of natural speech, which in turn has been linked to theta-based oscillations in the brain.}, journal = {Motor control}, author = {Sundqvist, Maria and Åsberg Johnels, Jakob and Lindh, Jonas and Laakso, Katja and Hartelius, Lena}, year = {2016}, volume = {20}, number = {3}, pages = {233--54}, } @book{lindh-2017-forensic-261214, title = {Forensic comparison of voices, speech and speakers : tools and methods in forensic phonetics}, abstract = {This thesis has three main objectives. The first objective (A) includes Study I, which investigates the parameter fundamental frequency (F0) and its robustness in different acoustic contexts by using different measures. The outcome concludes that using the alternative baseline as a measure will diminish the effect of low-quality recordings or varying speaking liveliness. However, both creaky voice and raised vocal effort induce intra-variation problems that are yet to be solved. The second objective (B) includes study II, III and IV. Study II investigates the differences between the results from an ear witness line-up experiment and the pairwise perceptual judgments of voice similarity performed by a large group of listeners. The study shows that humans seem to be much more focused on similarities of speech style than features connected to voice quality, even when recordings are played backwards. Study III investigates the differences between an automatic voice comparison system and humans’ perceptual judgments of voice similarity. The experiments’ results show that it is possible to see a correlation between how speakers were judged as more or less different using multidimensional scaling of similarity ranks compared to both the automatic system and the listeners. However, there are also differences due to the fact that human listeners include information about speech style and have difficulties weighting the parameters, i.e. ignoring them when they are contradictory. Study IV successfully investigates a new functional method for how to convert the perceptual similarity judgments made by humans and then compare those to the automatic system results within the likelihood ratio framework. It was discovered that the automatic system outperformed the naïve human listeners in this task (using a very small dataset). The third objective (C) includes study V. Study V investigates several statistical modelling techniques to calculate relevant likelihood ratios using simulations based on existing reference data in an authentic forensic case of a disputed utterance. The study presents several problems with modelling small datasets and develops methods to take into account the lack of data within the likelihood ratio framework. In summary, the thesis contains a larger historical background to forensic speaker comparison to guide the reader into the current research situation within forensic phonetics. The work further seeks to build a bridge between forensic phonetics and automatic voice recognition. Practical casework implications have been considered throughout the work on the basis of own experience as a forensic caseworker and through collaborative interaction with other parties working in the field, both in research and in forensic practice and law enforcement. Since 2005, the author has been involved in over 400 forensic cases and given testimony in several countries.}, author = {Lindh, Jonas}, year = {2017}, publisher = {Department of Philosophy, Linguistics, and Theory of Science, University of Gothenburg}, address = {Gothenburg}, ISBN = {978-91-629-0141-7}, } @inProceedings{kelly-etal-2016-identifying-242814, title = {Identifying Perceptually Similar Voices with a Speaker Recognition System Using Auto-Phonetic Features}, booktitle = {17th Annual Conference of the International-Speech-Communication-Association (Interspeech 2016). San Francisco, CA, USA. 8-12 september 2016.}, author = {Kelly, Finnian and Alexander, Anil and Forth, Oscar and Kent, Samuel and Lindh, Jonas and Åkesson, Joel}, year = {2016}, pages = {1567----1568}, } @inProceedings{lindh-akesson-2016-evaluation-242811, title = {Evaluation of Software ‘Error checks’ on the SweEval2016 Corpus for Forensic Speaker Comparison}, booktitle = {Proceedings of IAFPA25. 25th Annual Conference of the International Association for Forensic Phonetics and Acoustics. York, UK 24th – 27th July 2016}, author = {Lindh, Jonas and Åkesson, Joel}, year = {2016}, pages = {57--58}, } @inProceedings{kelly-etal-2016-automatically-242810, title = {Automatically identifying perceptually similar voices for voice parades}, booktitle = {Proceedings of IAFPA25. 25th Annual Conference of the International Association for Forensic Phonetics and Acoustics. York, UK 24th – 27th July 2016}, author = {Kelly, Finnian and Alexander, Anil and Forth, Oscar and Kent, Samuel and Lindh, Jonas and Åkesson, Joel}, year = {2016}, pages = {25--26}, } @inProceedings{lindh-etal-2016-comparison-242808, title = {Comparison of Perceptual and ASR Results on the SweEval2016 Corpus}, booktitle = {Proceedings of IAFPA25. 25th Annual Conference of the International Association for Forensic Phonetics and Acoustics. York, UK 24th – 27th July 2016.}, author = {Lindh, Jonas and Åkesson, Joel and Sundqvist, Maria}, year = {2016}, pages = {110--111}, } @inProceedings{forsberg-etal-2015-forensic-222113, title = {A forensic and sociophonetic perspective on a new corpus of young urban Swedish}, booktitle = {10th UK Language Variation and Change (UKLVC) conference 1-3/9 2015, York, UK}, author = {Forsberg, Julia and Gross, Johan and Lindh, Jonas and Åkesson, Joel}, year = {2015}, } @inProceedings{lindh-2015-forensic-222514, title = {Forensic speaker comparison using machine and mind}, booktitle = {24th Annual Conference of the International Association for Forensic Phonetics and Acoustics, 8 - 10 July 2015, Leiden, Netherlands}, author = {Lindh, Jonas}, year = {2015}, } @inProceedings{lindh-2015-forensic-222517, title = {Forensic speaker comparison evaluations}, booktitle = {Proceedings of Roundtable in Forensic Linguistics 2015, September 4th- 6th, Mainz, Germany}, author = {Lindh, Jonas}, year = {2015}, } @inProceedings{forsberg-etal-2015-speaker-220340, title = {Speaker comparison evaluation using a new corpus of urban speech}, booktitle = {24th Annual Conference of the International Association for Forensic Phonetics and Acoustics, 8-10/7 2015, Leiden}, author = {Forsberg, Julia and Gross, Johan and Lindh, Jonas and Åkesson, Joel}, year = {2015}, pages = {46--47}, } @inProceedings{lindh-akesson-2014-effect-218075, title = {Effect of the Double-Filtering effect on Automatic Voice Comparison}, abstract = {In forensic casework today it is not uncommon to receive material recorded with mobile phones or other handheld recording devices. From experience we know most people do not treat recordings with as much care as a person well versed in audio technology. Especially given the varying circumstances under which the material can be recorded. Thus it is important we learn more about what sort of acoustic effects take place under particular conditions and how these effects can influence Automatic Voice Comparison (AVC). The current study aims at evaluating the effects of recording material consisting of what could be described as ‘doublefiltered’ sound, henceforth referred to as DF, e.g. when a phone call is recorded using a handheld recorder placed in the vicinity of the mobile device. This filtering effect constitutes sound transmitted via GSM communication (1st filter) which then passes an indeterminable distance through the air before being captured by another recording device, such as a mobile phone or handheld recorder’s microphone (2nd filter). This effect affects the energy in the signal. The energy decreases in both the low and the high frequencies, while the middle frequencies are boosted. In this study we have used a database consisting of 150 female speakers of Swedish, all students of speech and language pathology. The recordings were made in a sound treated recording booth using a setup of one computer equipped with an internal MAudio soundcard and a high quality headset microphone. Each recording consists of solicited spontaneous speech together with read speech material (Swedish standard reading passage called ‘Ett svårt fall’). Each speaker is informed and encouraged to finish the task at their own pace. Mean duration of the full recording among the speakers was 69.3 seconds (std 16 seconds).}, booktitle = {Proceedings of IAFPA 2014. International Association for Forensic Phonetics and Acoustics Annual Conference 31 August - 3 September 2014}, author = {Lindh, Jonas and Åkesson, Joel}, year = {2014}, pages = {2}, } @inProceedings{hu-lindh-2014-effects-203082, title = {Effects of initial sounds on the perception of Chinese disyllable tones by Swedish students of Chinese}, abstract = {ABSTRACT This paper extends previous research on the effects of initial sounds on perception of Chinese disyllable tones. A perception test was performed on Swedish adult students of Chinese using disyllable words (most previous studies have been made using solely monosyllable words). The main results indicate that voiced initial sounds e.g. [l] have a strong connection to the tone confusion pattern Tone 2 perceived as Tone 3. On the contrary, a voiceless aspirated initial sound e.g. [th] is mostly connected to misidentifications between Tone 3 to Tone 2. Unvoiced unaspirated initial sounds affect tone perception heavily, especially when they occur in the second syllable of a disyllabic word. }, booktitle = {2014 International Conference on Phonetic Research and Language Learning (ICPRLL) & English Phonetic Conference in China (EPCC)}, author = {Hu, Guohua and Lindh, Jonas}, year = {2014}, } @inProceedings{hughes-etal-2012-operavox-201897, title = {operAVoX - On PErson RApid VOice eXaminer}, abstract = {At present, objective analysis of voice quality using acoustic parameters is only possible within a voice laboratory using specialist hardware and software. We have developed an easy-to-use portable voice analysis and feedback application running on the Apple iPhone, iPad, or iPod Touch. OperaVOX™ combines the signal processing power, easy connectivity, user-friendly interface, high-quality microphones and portability of these handheld devices with novel acoustic voice analysis algorithms to provide a powerful voice quality measurement tool that you can carry in your pocket. OperaVOX™ is designed for anyone who is interested in measuring the quality of their voice, such as a patient recovering following a stroke, a professional voice user such as singers or an aspiring actor. Built into OperaVOX™ are the validated Voice Handicap Index questionnaires and the ability for the user to record their voice for acoustic and perceptual analysis both on board the device and externally in the voice laboratory. Furthermore, the user can instruct OperaVOX™ to automatically and confidentially send these data via email to their speech therapist, voice coach or researcher team. OperaVOX™ makes it easy for everyone to accurately measure changes in the quality of their voice every hour, day, or week and without having to travel to the hospital. Two versions of OperaVOX™ will soon be available on the Apple App Store, one for the general public and another for professionals such as speech and language therapists. We have also worked with world-leading University research teams both in the UK and North America to develop bespoke versions of OperaVOX™ specifically tailored for their research and clinical requirements.}, booktitle = {5th national Conference in Logopedics}, author = {Hughes, Owain Rhys and Alexander, Anil and Forth, Oscar and Lindh, Jonas}, year = {2012}, number = {5}, } @article{morrison-etal-2014-likelihood-188784, title = {Likelihood ratio calculation for a disputed-utterance analysis with limited available data}, abstract = {We present a disputed-utterance analysis using relevant data, quantitative measurements and statistical models to calculate likelihood ratios. The acoustic data were taken from an actual forensic case in which the amount of data available to train the statistical models was small and the data point from the disputed word was far out on the tail of one of the modelled distributions. A procedure based on single multivariate Gaussian models for each hypothesis led to an unrealistically high likelihood ratio value with extremely poor reliability, but a procedure based on Hotelling’s T2 statistic and a procedure based on calculating a posterior predictive density produced more acceptable results. The Hotelling’s T2 procedure attempts to take account of the sampling uncertainty of the mean vectors and covariance matrices due to the small number of tokens used to train the models, and the posterior-predictive-density analysis integrates out the values of the mean vectors and covariance matrices as nuisance parameters. Data scarcity is common in forensic speech science and we argue that it is important not to accept extremely large calculated likelihood ratios at face value, but to consider whether such values can be supported given the size of the available data and modelling constraints.}, journal = {Speech Communication}, author = {Morrison, Geoffrey Stewart and Lindh, Jonas and Curran, James M}, year = {2014}, volume = {58}, pages = {81--90}, } @inProceedings{lindh-akesson-2013-pilot-188837, title = {A pilot study on the effect of different phonetic acoustic input to a GMM - UBM system for voice comparison}, booktitle = {22nd conference of the International Association for Forensic Phonetics and Acoustics (IAFPA). July 21st-24th, 2013, Tampa, Florida, USA}, author = {Lindh, Jonas and Åkesson, Joel}, year = {2013}, } @inProceedings{akesson-lindh-2013-describing-188836, title = {Describing a database collection procedure for studying ‘double filtering’ effects}, booktitle = {22nd conference of the International Association for Forensic Phonetics and Acoustics (IAFPA). July 21st-24th, 2013, Tampa, Florida, USA}, author = {Åkesson, Joel and Lindh, Jonas}, year = {2013}, } @inProceedings{gustavsson-etal-2013-neural-177670, title = {Neural processing of voices - Familiarity}, abstract = {Brain responses to familiar and unfamiliar voices were investigated with ERPs (Event Related Potentials). Presentation of a stream of one syllable utterances from a female voice established a standard expectation, and similar samples from four other male voices where inserted as unexpected deviants in a typical mismatch paradigm. The participants were 12 students from the basic course in linguistics. Two of the deviant voices were familiar voices of their teachers. The two other deviant voices were matched (same age, sex and dialect) but unfamiliar to the participants. A typical MMN (Mismatch Negativity) was elicited, i.e. a more negative response to the deviants compared to the standards. In contrast to verbal reports, where only one participant identified any of the deviant voices, the MMN response differed on group level between familiar and unfamiliar voices. MMN to familiar voices was larger. Using teachers' voices ensured naturalistic long term exposure, but did not allow for random assignment to conditions of familiarity making the design quasi-experimental. Thus acoustic analysis of voice characteristics as well as follow up studies with randomized exposure to voices are needed to rule out possible confounds and establish a causal effect of voice familiarity.}, booktitle = {Proceedings of 21st International Congress on Acoustics}, author = {Gustavsson, Lisa and Kallioinen, Petter and Klintfors, Eeva and Lindh, Jonas}, year = {2013}, volume = {19}, number = {I}, pages = {060204----6}, } @inProceedings{lindh-etal-2012-calculating-162456, title = {Calculating the reliability of a likelihood ratio from a disputed utterance}, booktitle = {Proceedings of IAFPA2012}, author = {Lindh, Jonas and Ochoa, Felipe and Morrison, Geoffrey Stewart}, year = {2012}, volume = {21}, } @inProceedings{morrison-etal-2012-calculating-167148, title = {Calculating the reliability of likelihood ratios: Addressing modelling problems related to small n and tails}, abstract = {In forensic speech science we are often faced with the problem of having a relatively small amount of data which is also multivariate and distributionally complex. This results in a serious problem exactly in the scenario where potentially large strengths of evidence could be obtained, i.e., when the trace data are on a tail of the distribution which models either the prosecution or defence hypothesis and a large magnitude log likelihood ratio is calculated. By definition the sampling of a distribution is sparse on its tails and this problem is compounded if the model is trained on a small amount of data – small fluctuations in the training data can lead to large changes in the calculated likelihoods on the tails and thus large changes in the calculated likelihood ratios for trace data on the tails. Large-magnitude calculated log likelihood ratios are therefore inherently unreliable.}, booktitle = {Proceedings of 14th Australasian International Conference on Speech Science and Technology}, author = {Morrison, Geoffrey Stewart and Ochoa, Felipe and Lindh, Jonas}, year = {2012}, volume = {14}, } @book{borin-etal-2012-svenska-163410, title = {Svenska språket i den digitala tidsåldern}, author = {Borin, Lars and Brandt, Martha and Edlund, Jens and Lindh, Jonas and Parkvall, Mikael}, year = {2012}, publisher = {Springer}, address = {Berlin}, ISBN = {978-3-642-30831-4}, } @inProceedings{sundqvist-etal-2012-acoustic-162452, title = {Acoustic and perceptual characteristics of speech in 22q11 deletion syndrome: Measures of voice onset time and syllable durations related to articulation and prosody.}, abstract = {Without abstract}, booktitle = {Proceedings of ICPLA2012}, author = {Sundqvist, Maria and Lindh, Jonas and Hartelius, Lena and Persson, Christina}, year = {2012}, volume = {14}, } @inProceedings{akesson-etal-2012-voice-162453, title = {Voice Onset Time before and after STN-surgery in patients with Parkinson’s disease}, abstract = {Without abstract}, booktitle = {ICPLA2012}, author = {Åkesson, Joel and Lindh, Jonas and Hartelius, Lena and Carlsson, Emilia}, year = {2012}, volume = {14}, } @inProceedings{laakso-etal-2012-swedish-162454, title = {Swedish Test of Intelligibility (STI) – Development of computerized assessment of word and sentence intelligibility and the performance of adult control speakers}, abstract = {Without abstract}, booktitle = {ICPLA2012}, author = {Laakso, Katja and Lindh, Jonas and Hartelius, Lena}, year = {2012}, volume = {14}, } @inProceedings{gustavsson-etal-2012-neural-162455, title = {Neural processing of familiar and unfamiliar voices}, booktitle = {Proceedings of IAFPA2012}, author = {Gustavsson, Lisa and Lindh, Jonas and Kallioinen, Petter and Markelius, Marie and Ericsson, Anna and Moniri, Sadegheh Farah and Klintfors, Eeva}, year = {2012}, volume = {21}, } @inProceedings{lindh-morrison-2011-humans-146100, title = {Humans versus machine: forensic voice comparison on a small database of Swedish voice recordings}, abstract = {A procedure for comparing the performance of humans and machines on speaker recognition and on forensic voice comparison is proposed and demonstrated. The procedure is consistent with the new paradigm for forensic-comparison science (use of the likelihood-ratio framework and testing of the validity and reliability of the results). The use of the procedure is demonstrated using a small database of Swedish voice recordings.}, booktitle = {Proceedings of ICPhS2011}, author = {Lindh, Jonas and Morrison, Geoffrey Stewart}, year = {2011}, volume = {17}, pages = {4}, } @techreport{borin-etal-2011-metadata-142495, title = {Metadata descriptions and other interoperability standards}, abstract = {An important aim of META-NORD is to upgrade and harmonize national language resources and tools in order to make them interoperable, within languages and across languages, with respect to their data formats and as far as possible also as regards their content. Since resources and to some extent tools will remain in one location – one of a number of META-NORD centers – the preferred way of accessing and utilizing resources and tools will be through metadata and APIs, allowing the assembly of on-the-fly tool-chains made up of standardized component language technology tools, processing distributed – and in many cases interlinked – language resources in standardized formats.}, author = {Borin, Lars and Lindh, Jonas and Brandt, Martha and Olsson, Leif-Jöran}, year = {2011}, } @article{lindh-2011-peter-142484, title = {Peter French}, abstract = {The Encyclopedia of Applied Linguistics is a ground-breaking resource, spanning the entire field. Truly international in scope, it brings together contributions from the world’s most respected scholars in applied linguistics. Available online or as a 10-volume print set, this comprehensive print and electronic resource provides an overview of all the key areas in applied linguistics, from language learning and language policy, to qualitative methods in applied linguistics, and technology and language. Comprising over 3.5 million words, across 1,200 entries, it spans key developments and ideas in applied linguistics, historic and emerging areas of research, and includes 250 biographies of prominent figures who have helped shaped this diverse, and ever-growing field.}, journal = {The Encyclopedia of Applied Linguistics}, author = {Lindh, Jonas}, year = {2011}, pages = {2}, } @article{lindh-2011-francis-142483, title = {Francis Nolan}, abstract = {The Encyclopedia of Applied Linguistics is a ground-breaking resource, spanning the entire field. Truly international in scope, it brings together contributions from the world’s most respected scholars in applied linguistics. Available online or as a 10-volume print set, this comprehensive print and electronic resource provides an overview of all the key areas in applied linguistics, from language learning and language policy, to qualitative methods in applied linguistics, and technology and language. Comprising over 3.5 million words, across 1,200 entries, it spans key developments and ideas in applied linguistics, historic and emerging areas of research, and includes 250 biographies of prominent figures who have helped shaped this diverse, and ever-growing field.}, journal = {The Encyclopedia of Applied Linguistics}, author = {Lindh, Jonas}, year = {2011}, pages = {2}, } @article{lindh-eriksson-2009-swedat-118616, title = {The SweDat Project and Swedia Database for Phonetic and Acoustic Research}, abstract = {The project described here may be seen as a continuation of an earlier project, SweDia 2000, aimed at transforming the database collected in that project to a full-fledged e-science database. The database consists of recordings of Swedish dialects from 107 locations in Sweden and Swedish speaking parts of Finland. The goal of the present project is to make the material searchable in a flexible and simple way to make it available to a much wider sector of the research community than is the case at present. The database will be accessible over the Internet via user-friendly interfaces specifically designed for this type of data. Other more specialized research interfaces will also be designed to facilitate phonetic acoustic research and orientation of the database.}, journal = {Proceeding E-SCIENCE '09 Proceedings of the 2009 Fifth IEEE International Conference on e-Science}, author = {Lindh, Jonas and Eriksson, Anders}, year = {2009}, pages = {45--49}, } @inProceedings{lindh-2005-visual-47310, title = {Visual Acoustic vs. Aural Perceptual Speaker Identification in a Closed Set of Disguised Voices}, booktitle = {Annual conference of IAFPA, in Marrakech 2005}, author = {Lindh, Jonas}, year = {2005}, } @inProceedings{lindh-2006-preliminary-47318, title = {Preliminary F0 Statistics and Forensic Phonetics}, booktitle = {Annual conference of IAFPA, Department of Linguistics, Göteborg University, 2006. Eds. Jonas Lindh and Anders Eriksson}, author = {Lindh, Jonas}, year = {2006}, } @inProceedings{lindh-2004-acoustic-47302, title = {Acoustic and Perceptual Analysis of Discontinuities in Two TTS Concatenation Systems}, booktitle = {Proceedings of the XVIIth Swedish Phonetics Conference, Department of Linguistics, Stockholm University}, author = {Lindh, Jonas}, year = {2004}, } @inProceedings{lindh-2002-preliminary-47286, title = {Preliminary Observations on Discontinuities in Two TTS Concatenation Systems.}, booktitle = {Proceedings of Fonetik 2002, TMH-QPSR, KTH, Stockholm}, author = {Lindh, Jonas}, year = {2002}, volume = {44(1)}, pages = {113--116}, } @inProceedings{lindh-2005-model-47305, title = {A Model-Based Experiment Towards an Emotional Synthesis}, booktitle = {Proceedings of the XVIIIth Swedish Phonetics Conference, Department of Linguistics, Göteborg University, eds Jonas Lindh & Anders Eriksson}, author = {Lindh, Jonas}, year = {2005}, } @inProceedings{lindh-2005-visual-47308, title = {Visual Acoustic vs. Aural Perceptual Speaker Identification in a Closed Set of Disguised Voices}, booktitle = {Proceedings of the XVIIIth Swedish Phonetics Conference, Department of Linguistics, Göteborg University, eds Jonas Lindh & Anders Eriksson}, author = {Lindh, Jonas}, year = {2005}, } @inProceedings{lindh-2006-preliminary-47314, title = {Preliminary Descriptive F0-statistics for Young Male Speakers}, booktitle = {Papers from FONETIK 2006, Working Papers, 52, Department of Linguistics and Phonetics, Lund University}, author = {Lindh, Jonas}, year = {2006}, volume = {52}, pages = {89--92}, } @inProceedings{lindh-2006-case-47316, title = {A Case Study of /r/ in the Västgöta Dialect}, booktitle = {Papers from FONETIK 2006, Working Papers, 52, Department of Linguistics and Phonetics, Lund University}, author = {Lindh, Jonas}, year = {2006}, volume = {52}, pages = {85--88}, } @inProceedings{lindh-2004-handling-47298, title = {Handling the "Voiceprint" Issue}, booktitle = {Proceedings of the XVIIth Swedish Phonetics Conference}, author = {Lindh, Jonas}, year = {2004}, } @inProceedings{lindh-2004-preliminary-47289, title = {Preliminary Observations on Speaker Identification in a Closed Set Using Graphic Representations of LTAS}, booktitle = {Annual conference of IAFPA, Helsinki 2004}, author = {Lindh, Jonas}, year = {2004}, } @inProceedings{lindh-2007-voxalys-47320, title = {Voxalys- a Pedagogical Praat Plugin for Voice Analysis.}, booktitle = {Proceedings of Fonetik 2007, TMH-QPSR, KTH, Stockholm}, author = {Lindh, Jonas}, year = {2007}, volume = {50}, pages = {73--77}, } @inProceedings{lindh-eriksson-2007-robustness-47321, title = {Robustness of Long Time Measures of Fundamental Frequency}, abstract = {In many speech technology based applications as well as in forensic phonetics it is desirable to obtain reliable estimates of a speaker’s fundamental frequency. We would like the measures to be accurate and reliable enough in order to be used meaningfully as a parameter in speaker identification or verification. Under optimal conditions such as when high quality studio recordings and normal speech styles are used this is often possible. In real life applications such conditions are the exception rather than the rule. The study presented here reports the result from an investigation where different measures were tested on speech material that varied with respect to speaking style, vocal effort and recording quality. Based on the results from these tests we would like to suggest a measure we call the alternative fundamental frequency baseline as the measure that is most robust with respect to the above-mentioned sources of variation. Index Terms: speaker recognition, speaker identification, fundamental frequency, F0.}, booktitle = {In Proceedings of Interspeech 2007, Antwerp, Belgium.}, author = {Lindh, Jonas and Eriksson, Anders}, year = {2007}, pages = {2025–2028}, } @inProceedings{lindh-2006-statistics-99168, title = {F0 Statistics, Robustness and Measures - Implications for Forensic Speaker Identification}, booktitle = {Proceedings of The Swedish Language Technology Conference 2006}, author = {Lindh, Jonas}, year = {2006}, } @inProceedings{lindh-2008-robustness-99174, title = {Robustness of Forced Alignment in a Forensic Context}, booktitle = {Proceedings of IAFPA2008, Lausanne, Switzerland}, author = {Lindh, Jonas}, year = {2008}, } @inProceedings{lindh-2009-perception-99180, title = {Perception of voice similarity and the results of a voice line-up}, booktitle = {The XXIInd Swedish Phonetics Conference, Department of Linguistics, Stockholm University, 2009.}, author = {Lindh, Jonas}, year = {2009}, ISBN = {978-91-633-4892-1}, pages = {186--189}, } @inProceedings{lindh-2009-first-99189, title = {A first step towards a text-independent speaker verification Praat plug-in using Mistral/Alize tools}, booktitle = {The XXIInd Swedish Phonetics Conference, Department of Linguistics, Stockholm University, 2009.}, author = {Lindh, Jonas}, year = {2009}, ISBN = {978-91-633-4892-1}, pages = {194--197}, } @inProceedings{akesson-etal-2010-post-122323, title = {Post surgery effects on VOT for Parkinson Disease STN/DBS patients}, abstract = {In this paper we discuss and analyse voice onset time (VOT) pre and post surgical treatment with deep brain stimulation (DBS) in 17 patients diagnosed with Parkinson’s disease (PD) at Sahlgrenska University Hospital in Gothenburg, Sweden. The patients were all at different stages of the disease but with the common denominator they have all undergone surgery to enhance synaptic responses through bilateral electrode implants in the subthalamic nucleus (STN) region of the brain, also known as Deep Brain Stimulation (DBS).The main focal point of the paper is to compare the pre and post surgery VOT data to see if there were any effects stemming from the STN surgery. Preliminary results for Mean VOT, Standard deviation VOT and percent of unsuccessfully produced/unmeasureable diadochokinetic syllable repetitions are presented and discussed. We found that the standard deviation decreased significantly for the consonant /p/ and this is discussed in the perspective of the ease of articulation of the different plosives. }, booktitle = {Proceedings from FONETIK 2010, Working Papers}, author = {Åkesson, Joel and Lindh, Jonas and Hartelius, Lena}, year = {2010}, volume = {54}, pages = {119--124}, } @inProceedings{lindh-eriksson-2010-voice-122326, title = {Voice similarity — a comparison between judgements by human listeners and automatic voice comparison}, abstract = {Comparison between the way human listeners judge voice similarity and how state-of-the art GMM-UBM systems for voice recognition compare voices is a little explored area of research. In this study groups of informants judged the similarity between voice samples taken from a set of fairly similar male voices that had previously been used in a voice line-up experiment. The result from the listening tests was then compared to the scores from a UBM-GMM automatic voice comparison system, built on the Mistral LIA_RAL open source platform. The results show a correlation between scores obtained from the automatic system and the judgements by the listeners. Listeners are, however, more sensitive to language dependent parameters or idiosyncratic phonetic features such as speaking tempo, while the system only bases its likelihood ratios on spectral similarities, i.e. timbre.}, booktitle = {Proceedings from FONETIK 2010, Working Papers}, author = {Lindh, Jonas and Eriksson, Anders}, year = {2010}, volume = {54}, pages = {63--69}, } @inProceedings{lindh-etal-2010-methodological-123919, title = {Methodological Issues in the Presentation and Evaluation of Speech Evidence in Sweden}, booktitle = {Proceedings of the 19th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Trier, Germany}, author = {Lindh, Jonas and Eriksson, Anders and Nelhans, Gustaf}, year = {2010}, number = {19}, } @inProceedings{lindh-2010-preliminary-123920, title = {Preliminary Formant Data of the Swedia Dialect Database in a Forensic Phonetic Perspective}, booktitle = {Proceedings of the 19th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Trier, Germany}, author = {Lindh, Jonas}, year = {2010}, number = {19}, } @inProceedings{lindh-2009-pick-123922, title = {Pick a Voice among Wolves, Goats and Lambs}, booktitle = {Proceedings of the 18th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Cambridge, UK}, author = {Lindh, Jonas}, year = {2009}, number = {18}, } @inProceedings{hu-lindh-2010-perceptual-125330, title = {PERCEPTUAL MISTAKES OF CHINESE TONES IN 2-SYLLABLE WORDS BY SWEDISH LISTENERS}, abstract = {Earlier studies on the perception of Chinese tones have almost exclusively used 1-syllable words for the listening tests (Kiriloff, 1969; Chuang, 1971; Klatt, 1973; Gandour, 1978). In these earlier studies the misperception between tone 2 and tone 3 has been shown to be the most common. However, no studies that we have found have looked at the perception of 2- syllable words besides Chuang (1971), who only used nonsense words. By tradition the teaching of Chinese as a foreign language has been concentrated on training of perception and production of tones since adult students have been shown to show particular difficulties in perceiving their difference. Experienced teachers have through tests established that this assumption is not valid when it comes to the so-called static tone. When it comes to communicating in Chinese and to be able to use the separate tones it is not enough to know the difference in 1-syllable words especially since most modern words in standard Chinese contains 2 or more. Guo (1993) has shown that the more syllables a word contains the higher ratio of misperceived tones. So far, no investigations for Swedish students have been performed. A possible hypothesis could be that Swedish listeners would perform better due to the Swedish grave and acute accents. By asking experienced teachers in Sweden, we knew that this should not be the case however. The general impressions from teachers are also that Swedish students have the largest proportion misperceptions between tone 2 and 3. To test this we conducted a listening test on 27 native speakers of Swedish (9 bilingual Chinese speakers with native ability in Swedish) on 25 Chinese 2-syllable lexical words with 15 different tone combinations. One male and one female native speaker of Chinese pronounced the words in isolation. The words were taken from a random number of 2-syllable glossary. Each speaker repeated the words once with 1 seconds pause in between the repetition and then 2 seconds pause before the new word. The audio was presented in high quality headphones in the student language lab at the University of Gothenburg. The participants were all second semester students of Chinese and the listening test was also an exam, which made the participants wanting to perform as well as possible. If they wanted they could repeat the sequence as many times as they until satisfied with their answer. The results show that produced tone 1 and tone 2 are confused more than 3 and 4 (tone 4 more than 3, see figure 1). However, the distribution of misperceptions seems to be rather equally distributed if we exclude the static tone (below called 0) in contradiction to earlier studies claiming misperception mostly between tone 2 and 3. However, we also notice that certain types of syllables containing different vowels are misperceived differently. The next step is to figure out if certain syllable nucleuses are more misperceived than others and in certain positions. These conclusions can in the future lead to new approaches when it comes to teaching students production and perception of tones.}, booktitle = {Proceedings of the Fourth European Conference on Tone and Intonation (TIE4)}, author = {Hu, Guohua and Lindh, Jonas}, year = {2010}, }