Språkbanken Text is a part of Språkbanken.
BibTeX

@inProceedings{ljunglof-2010-trik-130134,
	title        = {TRIK: A Talking and Drawing Robot for Children with Communication Disabilities},
	abstract     = {In this project we have developed and evaluated a setup involving a touch-screen computer with a dynamic screen software, and a drawing robot, which can communicate with each other via spoken language. The purpose is to help children with severe communication disabilities to learn language, language use and cooperation, in a playful and inspiring way.  

The communication board speaks and the robot is able to understand and talk back. This encourages the child to use language and learn to cooperate to reach a common goal, which in this case is to get the robot to draw figures on a paper.  

The robot has been tested on three children, two with cerebral palsy and one with autism spectrum disorder. During this session we present the preliminary results.},
	booktitle    = {ISAAC-2010, 14th Biennial Conference for Augmentative and Alternative Communication},
	author       = {Ljunglöf, Peter},
	year         = {2010},
}

@article{borin-forsberg-2010-beyond-129125,
	title        = {Beyond the synset: Swesaurus – a fuzzy Swedish wordnet},
	journal      = {Re-thinking synonymy: semantic sameness and similarity in languages and their description},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2010},
}

@inProceedings{kokkinakis-gerdin-2010-swedish-113194,
	title        = {A Swedish Scientific Medical Corpus for Terminology Management and Linguistic Exploration},
	abstract     = {This paper describes the development of a new Swedish scientific medical corpus. We provide a detailed description of the characteristics of this new collection as well results for a number of term management tasks, including terminology validation and terminology extraction based on this material. Although the corpus is representative for the scientific medical domain it still covers a lot of specialised sub-disciplines such as “diabetes” and “osteoporosis” which makes it suitable for facilitating the production of smaller and more focused subcorpora. We have tried to address this issue by making explicit some features of the corpus in order to demonstrate the corpus usefulness particularly for the quality assessment of official terminologies such as the Systematized NOmenclature of MEDicine - Clinical Terms (SNOMED CT).},
	booktitle    = {Proceedings of the 7th international conference on Language Resources and Evaluation (LREC), Malta},
	author       = {Kokkinakis, Dimitrios and Gerdin, Ulla},
	year         = {2010},
}

@inProceedings{wittenburg-etal-2010-resource-118909,
	title        = {Resource and service centres as the backbone for a sustainable service infrastructure},
	booktitle    = {Proceedings of LREC 2010},
	author       = {Wittenburg, Peter and Bel, Nuria and Borin, Lars and Budin, Gerhard and Calzolari, Nicoletta and Hajicova, Eva and Koskenniemi, Kimmo and Lemnitzer, Lothar and Mægaard, Bente and Piasecki, Maciej and Pierrel, Jean-Marie and Piperidis, Stelios and Skadina, Inguna and Tufis, Dan and van Veenendal, Remco and Váradi, Tamás and Wynne, Martin},
	year         = {2010},
}

@incollection{borin-kokkinakis-2010-literary-124517,
	title        = {Literary onomastics and language technology},
	booktitle    = {Literary education and digital learning},
	author       = {Borin, Lars and Kokkinakis, Dimitrios},
	year         = {2010},
	publisher    = {Information Science Reference},
	address      = {Hershey - New York},
	ISBN         = {978-1-60566-932-8},
	pages        = {53--78},
}

@article{borin-2010-zipf-130257,
	title        = {Med Zipf mot framtiden - en integrerad lexikonresurs för svensk språkteknologi},
	journal      = {LexicoNordica},
	author       = {Borin, Lars},
	year         = {2010},
	volume       = {17},
	pages        = {35--54},
}

@book{volodina-2010-corpora-127225,
	title        = {Corpora in Language Classroom: Reusing Stockholm Umeå Corpus in a vocabulary exercise generator},
	abstract     = {Authentic examples as teaching material are not easy to obtain. Corpora are able to solve this problem, as has been witnessed before. Most experiments with corpora in language classroom describe concordances. However, there are numerous other ways of bringing corpora into language education, as shown in this research. A selective learner-oriented exercise generator has been implemented on the basis of Stockholm Umeå Corpus (SUC). SUC texts have been tested for readability and levels were assigned. This generator assists in automatic selection of authentic examples of appropriate learner levels as well as in construction of wordbank-, multiple choice items and c-tests for a specified proficiency level, frequency band and word class. In Vocabulary Size Test potential words are generated on the basis of existing morphemes and SUC-based frequency lists. Interesting practical and theoretical questions connected with reusage of corpora in an exercise generator are described in this book. The research might inspire computational linguists, language teachers and everyone interested in Computer-Assisted Language Learning and Corpus Linguistics to test similar techniques in their practices. },
	author       = {Volodina, Elena},
	year         = {2010},
	publisher    = {Lambert Academic Publishing},
	address      = {Saarbrücken},
	ISBN         = {978-3-8433-5256-7},
}

@inProceedings{kokkinakis-2010-korpus-119444,
	title        = {Korpus för vårdens och omsorgens fackspråk.},
	abstract     = {Inom ramen för regeringens satsning ”Nationell IT-strategi för vård och omsorg” har Socialstyrelsen fått i uppdrag att översätta och anpassa begreppssystemet ’the Systematized Nomenclature of Medicine, Clinical Terms’ till svenska. Med hjälp av Läkartidningens digitala arkiv har vi utvecklat metoder för att effektivisera kvalitetssäkringen av terminnehållet. },
	booktitle    = {Humanistdagen 2010 - humaniora i dagens samhälle.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@inProceedings{wilhelmsson-2010-automatisk-247440,
	title        = {Automatisk generering av frågor som svensk text besvarar: ett informationssystem},
	abstract     = {Vilken information kan en text sägas innehålla? Ett enkelt svar är ”de frågor som den besvarar.” I vilken grad går det i så fall att automatiskt generera dessa frågor och därmed programmera ett frågebesvarande informationssystem för svensk text?},
	booktitle    = {Röster från Humanisten 2010},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
}

@incollection{borin-2010-avtryck-136656,
	title        = {Avtryck  från  WGLN-projekten  i  forskningen},
	booktitle    = {Kunskapens  nya  världar},
	author       = {Borin, Lars},
	year         = {2010},
	publisher    = {Uppsala universitet, Uppsala Learning Lab},
	address      = {Uppsala},
	ISBN         = {978-91-506-2189-1},
	pages        = {127--133},
}

@inProceedings{forsbom-wilhelmsson-2010-revision-259876,
	title        = {Revision of Part-of-Speech Tagging in Stockholm Umeå Corpus 2.0},
	abstract     = {Many parsers use a part-of-speech tagger as a ﬁrst step in parsing. The accuracy of the tagger naturally affects the performance of the parser. In this experiment, we revise 1500+ proposed errors in SUC 2.0 that were mainly found during work with schema parsing, and evaluate tagger instances trained on the revised corpus. The revisions turned out to be beneﬁcial also for the taggers.},
	booktitle    = {Proceedings of the Third Swedish Language Technology Conference (SLTC), Linköping, Sverige},
	author       = {Forsbom, Eva and Wilhelmsson, Kenneth},
	year         = {2010},
	address      = {Linköping},
}

@inProceedings{kokkinakis-toporowskagronostaj-2010-linking-119441,
	title        = {Linking SweFN++ with Medical Resources, towards a MedFrameNet for Swedish},
	abstract     = {In this pilot study we define and apply a methodology for building an event extraction system for the Swedish scientific medical and clinical language. Our aim is to find and describe linguistic expressions which refer to medical events, such as events related to diseases, symptoms and drug effects. In order to achieve this goal we have initiated actions that aim to extend and refine parts of the ongoing compilation of the Swedish FrameNet++ (SFN++), which, as its English original predecessor, is grounded in Frame Semantics which provides a sound theoretical ground for modeling and linking linguistic structures encountered in general language and in specific domains (after specialization). Using such resource we manually annotate domain texts to be used as training data for automatic event extraction by automated techniques.},
	booktitle    = {Proceedings of the Second Louhi Workshop on Text and Data Mining of Health Documents. A NAACL-HTL Workshop},
	author       = {Kokkinakis, Dimitrios and Toporowska Gronostaj, Maria},
	year         = {2010},
}

@article{kokkinakis-2010-data-130212,
	title        = {Is data scrubbing useful for anonymizing sensitive data?.},
	abstract     = {The release of individual data for research, public health planning, health care statistics, monitoring of diagnostic tests, automated data collection for health care registries and tracking disease outbreaks are some of the areas in which the protection of Personal Health Information (PHI) has become an important concern. The purpose of this study is to adapt and apply synergetic methods to document de-identification, particularly in the clinical setting. The main challenge is to retain important concepts and PHI in the documents in a standardized and neutral manner as means of encryption without violating the integrity of the PHI and without sacrificing the quality and intended meaning of the authors.},
	journal      = {the Third Swedish Language Technology Conference},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@article{kokkinakis-2010-"data-130213,
	title        = {Är "data scrubbing" en användbar metod för att anonymisera känsliga patientdata?.},
	abstract     = {De senaste årens ökande användning av modern informationsteknik inom sjukvården har medfört en kraftig ökning av elektronisk dokumentation som rör patientens hälsotillstånd, vård och behandling. Vårddokumentationen blir både mer detaljerad och mer individuell, samtidigt som den uppdateras och förändras regelbundet. Patientjournalen är i första hand till för att bidra till en god och säker vård av patienten, men också en viktig informationskälla för FoU. Ett stort hinder för utnyttjandet av journalinformation som forskningskälla är de etiska och rättsliga problemen. För att kunna hantera och utnyttja dessa stora och ständigt växande informationsmängder ställs därmed högre krav på säker, skyddad och effektiv informationshantering.},
	journal      = {Svenska Läkaresällskapets Riksstämman },
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@article{kokkinakis-gerdin-2010-lakartidningens-120480,
	title        = {Läkartidningens arkiv i en ny skepnad - En resurs för forskare, läkare och allmänhet},
	abstract     = {I Sverige har det tagits fram en medicinsk korpus baserad på Läkartidningens digitala arkiv. Denna resurs möjliggör precisa sökningar
och värdefull tillgång till medicinsk terminologisk information på olika nivåer. Dimitrios Kokkinakis från Göteborgs universitet
och Ulla Gerdin från Socialstyrelsen presenterar projektet.
},
	journal      = {Språkbruk},
	author       = {Kokkinakis, Dimitrios and Gerdin, Ulla},
	year         = {2010},
	volume       = {1/2010},
	pages        = {22--28},
}

@inProceedings{lindh-etal-2010-methodological-123919,
	title        = {Methodological Issues in the Presentation and Evaluation of Speech Evidence in Sweden},
	booktitle    = {Proceedings of the 19th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Trier, Germany},
	author       = {Lindh, Jonas and Eriksson, Anders and Nelhans, Gustaf},
	year         = {2010},
	number       = {19},
}

@article{kokkinakis-2010-complementary-125644,
	title        = {Complementary Methods for De-identifying Sensitive Data with a focus on Clinical Discourse},
	abstract     = {In the era of the Electronic Health Record (EHR) the release of individual data for research, public health planning, health care statistics, monitoring of diagnostic tests, automated data collection for health care registries and tracking disease outbreaks are some of the areas in which the protection of Personal Health Information (PHI) has become an important concern. The purpose of this study is to adapt and apply synergetic methods to document de-identification, particularly clinical, or other sources of sensitive data. The main challenge and goal of this research is to retain important concepts and PHI in the documents in a standardized and neutral manner as means of encryption without violating the integrity of the PHI and without sacrificing the quality and intended meaning of the authors.},
	journal      = {Revista de Procesamiento de Lenguaje Natural (SEPLN)},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
	volume       = {45},
	pages        = {243--246},
}

@inProceedings{ljunglof-2010-grasp-130137,
	title        = {GRASP: Grammar-based Language Learning},
	abstract     = {We are developing a pedagogical tool to support language learning and training for children with communicative disabilities. The system has a graphical interface, where the user can move, replace, add, and in other ways modify, words or phrases. The system keeps the sentence grammatical, by automatically rearranging the words and changing inflection, if necessary. In this way we hope that the system stimulates the child to explore the possibilities of language.},
	booktitle    = {SLTC-2010, 3rd Swedish Language Technology Conference},
	author       = {Ljunglöf, Peter},
	year         = {2010},
}

@inProceedings{hu-lindh-2010-perceptual-125330,
	title        = {PERCEPTUAL MISTAKES OF CHINESE TONES IN 2-SYLLABLE WORDS BY SWEDISH LISTENERS},
	abstract     = {Earlier studies on the perception of Chinese tones have almost exclusively used 1-syllable
words for the listening tests (Kiriloff, 1969; Chuang, 1971; Klatt, 1973; Gandour, 1978). In
these earlier studies the misperception between tone 2 and tone 3 has been shown to be the
most common. However, no studies that we have found have looked at the perception of 2-
syllable words besides Chuang (1971), who only used nonsense words.
By tradition the teaching of Chinese as a foreign language has been concentrated on training
of perception and production of tones since adult students have been shown to show particular
difficulties in perceiving their difference. Experienced teachers have through tests established
that this assumption is not valid when it comes to the so-called static tone. When it comes to
communicating in Chinese and to be able to use the separate tones it is not enough to know
the difference in 1-syllable words especially since most modern words in standard Chinese
contains 2 or more. Guo (1993) has shown that the more syllables a word contains the higher
ratio of misperceived tones.
So far, no investigations for Swedish students have been performed. A possible hypothesis
could be that Swedish listeners would perform better due to the Swedish grave and acute
accents. By asking experienced teachers in Sweden, we knew that this should not be the case
however. The general impressions from teachers are also that Swedish students have the
largest proportion misperceptions between tone 2 and 3. To test this we conducted a listening
test on 27 native speakers of Swedish (9 bilingual Chinese speakers with native ability in
Swedish) on 25 Chinese 2-syllable lexical words with 15 different tone combinations. One
male and one female native speaker of Chinese pronounced the words in isolation. The words
were taken from a random number of 2-syllable glossary. Each speaker repeated the words
once with 1 seconds pause in between the repetition and then 2 seconds pause before the new
word. The audio was presented in high quality headphones in the student language lab at the
University of Gothenburg. The participants were all second semester students of Chinese and
the listening test was also an exam, which made the participants wanting to perform as well as
possible. If they wanted they could repeat the sequence as many times as they until satisfied
with their answer.
The results show that produced tone 1 and tone 2 are confused more than 3 and 4 (tone 4 more
than 3, see figure 1). However, the distribution of misperceptions seems to be rather equally
distributed if we exclude the static tone (below called 0) in contradiction to earlier studies
claiming misperception mostly between tone 2 and 3. However, we also notice that certain
types of syllables containing different vowels are misperceived differently. The next step is to
figure out if certain syllable nucleuses are more misperceived than others and in certain
positions. These conclusions can in the future lead to new approaches when it comes to
teaching students production and perception of tones.},
	booktitle    = {Proceedings of the Fourth European Conference on Tone and Intonation (TIE4)},
	author       = {Hu, Guohua and Lindh, Jonas},
	year         = {2010},
}

@inProceedings{lindh-eriksson-2010-voice-122326,
	title        = {Voice similarity — a comparison between judgements by human listeners and automatic voice comparison},
	abstract     = {Comparison between the way human listeners judge voice similarity and how state-of-the art GMM-UBM systems for voice recognition compare voices is a little explored area of research. In this study groups of informants judged the similarity between voice samples taken from a set of fairly similar male voices that had previously been used in a voice line-up experiment. The result from the listening tests was then compared to the scores from a UBM-GMM automatic voice
comparison system, built on the Mistral LIA_RAL open source platform. The results show a correlation between scores obtained from the automatic system and the judgements by the listeners. Listeners are, however, more sensitive to language dependent parameters or idiosyncratic phonetic features such as speaking tempo, while the system only bases its likelihood ratios on spectral similarities, i.e. timbre.},
	booktitle    = {Proceedings from FONETIK 2010, Working Papers},
	author       = {Lindh, Jonas and Eriksson, Anders},
	year         = {2010},
	volume       = {54},
	pages        = {63--69},
}

@inProceedings{akesson-etal-2010-post-122323,
	title        = {Post surgery effects on VOT for Parkinson Disease STN/DBS patients},
	abstract     = {In this paper we discuss and analyse voice onset time (VOT) pre and post surgical treatment with deep brain stimulation (DBS) in 17 patients diagnosed with Parkinson’s disease (PD) at Sahlgrenska University Hospital in Gothenburg, Sweden. The patients were all at different stages of the disease but
with the common denominator they have all undergone surgery to enhance synaptic responses through bilateral electrode implants in the subthalamic nucleus (STN) region of the brain, also known as Deep Brain Stimulation (DBS).The main focal point of the paper is to compare the pre and post surgery VOT data to see if there were any effects stemming from the STN surgery. Preliminary results for Mean VOT, Standard deviation VOT and percent of unsuccessfully produced/unmeasureable diadochokinetic syllable repetitions are presented and discussed. We found that the standard deviation decreased significantly for the consonant /p/ and this is discussed in the perspective of the ease of articulation of the different plosives.
},
	booktitle    = {Proceedings from FONETIK 2010, Working Papers},
	author       = {Åkesson, Joel and Lindh, Jonas and Hartelius, Lena},
	year         = {2010},
	volume       = {54},
	pages        = {119--124},
}

@article{wilhelmsson-2010-automatisk-137859,
	title        = {Automatisk generering av frågor som svensk text besvarar: ett informationssystem},
	abstract     = {Vilken information kan en text sägas innehålla? Ett enkelt svar är ”de frågor som den besvarar.” I vilken grad går det i så fall att automatiskt generera dessa frågor och därmed programmera ett frågebesvarande informationssystem för svensk text?

Ett prototypsystem för denna uppgift har skapats som en del av ett avhandlingsprojekt inom språkteknologi. Det vore till exempel möjligt att vidareutveckla det system som här visas till en allmän teknisk tjänst, t.ex. webbaserad, som ger användare möjlighet att söka efter information med naturligt språk i en valfri digital text.

Denna text tar upp de allmänna förutsättningarna för automatisk generering av de frågor som en svensk text besvarar. Själva den teoretiska uppgiften har egenskaper som kan sägas vara lingvistiska eller informationsteoretiska. För att skapa det program som här beskrivs har dessutom naturligtvis en programmeringsinsats krävts, men denna kommer inte att tas upp här, den rent praktiska sidan av uppgiften är möjlig att lösa på många sätt.

http://www.hum.gu.se/samverkan/popularvetenskap/roster-fran-humanisten-2010/

http://hdl.handle.net/2320/7176
},
	journal      = {Röster från Humanisten, 2010},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	volume       = {2010},
}

@inProceedings{allvin-etal-2010-characteristics-120479,
	title        = {Characteristics and Analysis of Finnish and Swedish Clinical Intensive Care Nursing Narratives},
	abstract     = {We present a comparative study of Finnish and Swedish free-text nursing narratives from intensive care. Although the two languages
are linguistically very dissimilar, our hypothesis is that there are similarities that are important
and interesting from a language
technology point of view. This may have implications when building tools to support producing and using health care documentation.
We perform a comparative qualitative analysis based on structure and content, as well as a comparative quantitative analysis on Finnish and Swedish Intensive Care Unit (ICU) nursing narratives. Our findings are that ICU nursing narratives in Finland and Sweden have many properties in common, but that many of these are challenging when it comes to developing language technology tools.
},
	booktitle    = {Proceedings of the NAACL HLT 2010 Second Louhi Workshop on Text and Data Mining of Health Documents},
	author       = {Allvin, H. and Carlsson, E. and Dalianis, H. and Danielsson-Ojala, R. and Daudaravicius, V. and Hassel, M. and Kokkinakis, Dimitrios and Lundgren-Laine, H. and Nilsson, G. and Nytrø, Ø. and Salanterä, S. and Skeppstedt, M. and Suominen, H. and Velupillai, S.},
	year         = {2010},
	pages        = {53 -- 60},
}

@article{kokkinakis-2010-initiala-130210,
	title        = {Initiala resultat av en storskalig automatisk indexering av vetenskaplig litteratur med hela det svenska SNOMED CT - problem och möjligheter.},
	abstract     = {Syftet med denna studie är dels att skapa en stor samling svenska medicinska elektroniska texter, en korpus, och dels att validera och kvalitetssäkra existerande termer ur SNOMED CT (the Systematized NOmenclature of MEDicine - Clinical Terms) gentemot korpusinnehållet. På det sättet kan man få en objektiv uppfattning om SNOMED CT:s validitet, täckning och reliabilitet. Man kan även berika terminologin med nya termer eller termvarianter genom att automatiskt extrahera termkandidater inom olika delfackområden från korpusen med hjälp av olika statistiska och lingvistiska metoder. Resultat av de korpusbaserade, empiriska studierna ska kunna användas av terminologer i deras arbete med att göra SNOMED CT mer täckande, pålitlig och enhetlig. Samtidigt, genom användning av autentisk data, kan man försäkra sig om att termvarianterna (existerande eller nya) är vedertagna termer hos fackmän. I fall flera etablerade termvarianter (nya termkandidater) förekommer i korpusen kan dessa införas efter manuell granskning som synonymer till rekommenderade termer (med stöd av ett lämpligt granskningsgränssnitt) och därmed vidare utveckla innehållet i SNOMED CT. Följaktligen kommer vår presentation att innehålla en redovisning som bygger på tre huvudpelare – korpusuppbyggnad – termvalidering – termextrahering. Korpusen samlades in från två källor efter erhållet tillstånd. Texternas ursprung i korpusen kommer dels från Läkartidningens (LT) digitala arkiv <http://ltarkiv.lakartidningen.se> och dels från DiabetologNytts (DN) digitala arkiv <http://diabetolognytt.se/aterkommande/arkivet.html>.},
	journal      = {2010-års nationella termkonferens: Professionen i språket - språket i professionen.},
	author       = {Kokkinakis, Dimitrios},
	year         = {2010},
}

@book{wilhelmsson-2010-heuristisk-126092,
	title        = {Heuristisk analys med Diderichsens satsschema - Tillämpningar för svensk text},
	abstract     = {A heuristic method for parsing Swedish text, heuristic schema parsing, is described and implemented. Focusing on main clause (primary) analysis, a collection of licensing techniques for removing non-primary verb candidates is employed, leaving e.g. the primary verbs, particles and conjunctions (bounded key constituents) that delimit the content of the fields in Diderichsen’s sentence schema. Hereby, the subsequent identification of constituents which do not have an upper bound on their length (subject, object/predicatives and adverbials) can be identified relying to a lesser on extent explicit pattern matching, and more on different heuristic rules. For phrase type identification and delimitation of these constituents, when adjacent to each other, a novel chunking technique, rank-based chunking, is applied. Following this, a series of further rules merge chunks into larger ones, aiming at a final number of nominal chunks compatible with the valency information of the main verb. The aim is to identify full nominal and adverbial constituents, including post-modifiers. The implementation uses the Stockholm Umeå Corpus 2.0, a corpus which is balanced for different genres in published Swedish text. SUC’s tagset is also used unmodified in part-of-speech tagging which enables the program to deal with input text. The functional parsing, which includes no explicit language-defining grammar component is carried out technically using an object-based representation of clause structure.

Although output formats and types of evaluations of correctness are very different in parsers for Swedish text, it is claimed that the manual approach presented can provide high accuracy, which can be improved given more time for development.

The thesis work also includes two prototype applications, both requiring high accuracy of the sort of functional syntactic analysis described here. The first one is an implementation of automatic syntactic fronting in the area of text editing for Swedish, where the user is presented with a syntactically analyzed copy of her writing, from which paraphrases easily can be generated. The second application is in the field of natural language query systems and produces questions with answers from an arbitrary declarative input text. This prototype incorporates a text database from Swedish Wikipedia, and investigates primarily generation of WH-questions formed via fronting of unbounded primary constituents. The questions are generated as a text is opened and thus permits users to only ask the available ones, thus aiming at a high precision value.},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}

@book{wilhelmsson-2010-heuristisk-132135,
	title        = {Heuristisk analys med Diderichsens satsschema – Tillämpningar för svensk text, 2 uppl},
	abstract     = {A heuristic method for parsing Swedish text, heuristic schema parsing, is described and implemented. Focusing on main clause (primary) analysis, a collection of licensing techniques for removing non-primary verb candidates is employed, leaving e.g. the primary verbs, particles and conjunctions (bounded key constituents) that delimit the content of the fields in Diderichsen’s sentence schema. Hereby, the subsequent identification of constituents which do not have an upper bound on their length (subject, object/predicatives and adverbials) can be identified relying to a lesser on extent explicit pattern matching, and more on different heuristic rules. For phrase type identification and delimitation of these constituents, when adjacent to each other, a novel chunking technique, rank-based chunking, is applied. Following this, a series of further rules merge chunks into larger ones, aiming at a final number of nominal chunks compatible with the valency information of the main verb. The aim is to identify full nominal and adverbial constituents, including post-modifiers. The implementation uses the Stockholm Umeå Corpus 2.0, a corpus which is balanced for different genres in published Swedish text. SUC’s tagset is also used unmodified in part-of-speech tagging which enables the program to deal with input text. The functional parsing, which includes no explicit language-defining grammar component is carried out technically using an object-based representation of clause structure.

The thesis work also includes two prototype applications, both requiring high accuracy of the sort of functional syntactic analysis described here. The first one is an implementation of automatic syntactic fronting in the area of text editing for Swedish, where the user is presented with a syntactically analyzed copy of her writing, from which paraphrases easily can be generated. The second application is in the field of natural language query systems and produces questions with answers from an arbitrary declarative input text. This prototype incorporates a text database from Swedish Wikipedia, and investigates primarily generation of WH-questions formed via fronting of unbounded primary constituents. The questions are generated as a text is opened and thus permits users to only ask the available ones, thus aiming at a high precision value.
},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
	ISBN         = {978-91-977196-9-8},
}

@incollection{toporowskagronostaj-skoldberg-2010-swedish-121119,
	title        = {Swedish Medical Collocations: A Lexicographic Approach},
	booktitle    = {Korpora, Web und Datenbanken. Computergestützte Methoden in der modernen Phraseologie und Lexikographie  (Phraseologie und Parömiologie 25)},
	author       = {Toporowska Gronostaj, Maria and Sköldberg, Emma},
	year         = {2010},
	publisher    = {Schneider Verlag Hohengehren GmbH},
	address      = {Baltmannsweiler, Germany},
	ISBN         = {978-3-8340-0733-9},
	pages        = {181--195},
}

@article{borin-etal-2010-swedish-129126,
	title        = {Swedish FrameNet++},
	journal      = {Swedish Language Technology Conference 2010},
	author       = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios},
	year         = {2010},
}

@incollection{ljunglof-wiren-2010-syntactic-99884,
	title        = {Syntactic parsing},
	abstract     = {This chapter presents basic techniques for grammar-driven natural language parsing, that is, analysing a string of words (typically a sentence) to determine its structural description according to a formal grammar. Basic parsing concepts are explained after which a number of well-known parsing techniques are described.},
	booktitle    = {Handbook of Natural Language Processing, 2nd edition},
	author       = {Ljunglöf, Peter and Wirén, Mats},
	year         = {2010},
	publisher    = {CRC Press, Taylor and Francis},
	ISBN         = {978-1420085921},
}

@inProceedings{borin-forsberg-2010-from-118908,
	title        = {From the People’s Synonym Dictionary to fuzzy synsets - first steps},
	booktitle    = {Proceedings of the LREC 2010 workshop Semantic relations. Theory and Applications},
	author       = {Borin, Lars and Forsberg, Markus},
	year         = {2010},
	pages        = {18--25},
}

@inProceedings{borin-etal-2010-past-110368,
	title        = {The past meets the present in Swedish FrameNet++},
	abstract     = {The paper is about a recently initiated project which aims at the development of a Swedish FrameNet as an integral part of a larger lexical resource, hence the name “Swedish FrameNet++” (SweFN++). It focuses on reuse of free electronic resources and their role in the acquisition and population of Swedish frames. After a brief overview of Swedish resources, we reflect on three approaches to recycling the available lexical data in a semi-automatic manner. SweFN++ will be a multi-functional resource supporting research within lexicology and linguistics as well as different applications within computational lexicography and language technology, not to mention e-science.},
	booktitle    = {14th EURALEX International Congress},
	author       = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios},
	year         = {2010},
	pages        = {269--281},
}

@inProceedings{lindh-2010-preliminary-123920,
	title        = {Preliminary Formant Data of the Swedia Dialect Database in a Forensic Phonetic Perspective},
	booktitle    = {Proceedings of the 19th Annual Conference of the International Association for Forensic Phonetics and Acoustics, Trier, Germany},
	author       = {Lindh, Jonas},
	year         = {2010},
	number       = {19},
}

@inProceedings{borin-etal-2010-diabase-118907,
	title        = {Diabase: Towards a diachronic BLARK in support of historical studies},
	booktitle    = {Proceedings of LREC 2010},
	author       = {Borin, Lars and Forsberg, Markus and Kokkinakis, Dimitrios},
	year         = {2010},
}
Page manager: sb-webb