Hoppa till huvudinnehåll
Språkbanken Text är en avdelning inom Språkbanken.

BibTeX

@inProceedings{forsbom-wilhelmsson-2010-revision-259876,
	title        = {Revision of Part-of-Speech Tagging in Stockholm Umeå Corpus 2.0},
	abstract     = {Many parsers use a part-of-speech tagger as a first step in parsing. The accuracy of the tagger naturally affects the performance of the parser. In this experiment, we revise 1500+ proposed errors in SUC 2.0 that were mainly found during work with schema parsing, and evaluate tagger instances trained on the revised corpus. The revisions turned out to be beneficial also for the taggers.},
	booktitle    = {Proceedings of the Third Swedish Language Technology Conference (SLTC), Linköping, Sverige},
	author       = {Forsbom, Eva and Wilhelmsson, Kenneth},
	year         = {2010},
	address      = {Linköping},
}

@inProceedings{wilhelmsson-2010-automatisk-247440,
	title        = {Automatisk generering av frågor som svensk text besvarar: ett informationssystem},
	abstract     = {Vilken information kan en text sägas innehålla? Ett enkelt svar är ”de frågor som den besvarar.” I vilken grad går det i så fall att automatiskt generera dessa frågor och därmed programmera ett frågebesvarande informationssystem för svensk text?},
	booktitle    = {Röster från Humanisten 2010},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
}

@article{wilhelmsson-2010-automatisk-137859,
	title        = {Automatisk generering av frågor som svensk text besvarar: ett informationssystem},
	abstract     = {Vilken information kan en text sägas innehålla? Ett enkelt svar är ”de frågor som den besvarar.” I vilken grad går det i så fall att automatiskt generera dessa frågor och därmed programmera ett frågebesvarande informationssystem för svensk text?

Ett prototypsystem för denna uppgift har skapats som en del av ett avhandlingsprojekt inom språkteknologi. Det vore till exempel möjligt att vidareutveckla det system som här visas till en allmän teknisk tjänst, t.ex. webbaserad, som ger användare möjlighet att söka efter information med naturligt språk i en valfri digital text.

Denna text tar upp de allmänna förutsättningarna för automatisk generering av de frågor som en svensk text besvarar. Själva den teoretiska uppgiften har egenskaper som kan sägas vara lingvistiska eller informationsteoretiska. För att skapa det program som här beskrivs har dessutom naturligtvis en programmeringsinsats krävts, men denna kommer inte att tas upp här, den rent praktiska sidan av uppgiften är möjlig att lösa på många sätt.

http://www.hum.gu.se/samverkan/popularvetenskap/roster-fran-humanisten-2010/

http://hdl.handle.net/2320/7176
},
	journal      = {Röster från Humanisten, 2010},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	volume       = {2010},
}

@book{wilhelmsson-2010-heuristisk-132135,
	title        = {Heuristisk analys med Diderichsens satsschema – Tillämpningar för svensk text, 2 uppl},
	abstract     = {A heuristic method for parsing Swedish text, heuristic schema parsing, is described and implemented. Focusing on main clause (primary) analysis, a collection of licensing techniques for removing non-primary verb candidates is employed, leaving e.g. the primary verbs, particles and conjunctions (bounded key constituents) that delimit the content of the fields in Diderichsen’s sentence schema. Hereby, the subsequent identification of constituents which do not have an upper bound on their length (subject, object/predicatives and adverbials) can be identified relying to a lesser on extent explicit pattern matching, and more on different heuristic rules. For phrase type identification and delimitation of these constituents, when adjacent to each other, a novel chunking technique, rank-based chunking, is applied. Following this, a series of further rules merge chunks into larger ones, aiming at a final number of nominal chunks compatible with the valency information of the main verb. The aim is to identify full nominal and adverbial constituents, including post-modifiers. The implementation uses the Stockholm Umeå Corpus 2.0, a corpus which is balanced for different genres in published Swedish text. SUC’s tagset is also used unmodified in part-of-speech tagging which enables the program to deal with input text. The functional parsing, which includes no explicit language-defining grammar component is carried out technically using an object-based representation of clause structure.

The thesis work also includes two prototype applications, both requiring high accuracy of the sort of functional syntactic analysis described here. The first one is an implementation of automatic syntactic fronting in the area of text editing for Swedish, where the user is presented with a syntactically analyzed copy of her writing, from which paraphrases easily can be generated. The second application is in the field of natural language query systems and produces questions with answers from an arbitrary declarative input text. This prototype incorporates a text database from Swedish Wikipedia, and investigates primarily generation of WH-questions formed via fronting of unbounded primary constituents. The questions are generated as a text is opened and thus permits users to only ask the available ones, thus aiming at a high precision value.
},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
	ISBN         = {978-91-977196-9-8},
}

@book{wilhelmsson-2010-heuristisk-126092,
	title        = {Heuristisk analys med Diderichsens satsschema - Tillämpningar för svensk text},
	abstract     = {A heuristic method for parsing Swedish text, heuristic schema parsing, is described and implemented. Focusing on main clause (primary) analysis, a collection of licensing techniques for removing non-primary verb candidates is employed, leaving e.g. the primary verbs, particles and conjunctions (bounded key constituents) that delimit the content of the fields in Diderichsen’s sentence schema. Hereby, the subsequent identification of constituents which do not have an upper bound on their length (subject, object/predicatives and adverbials) can be identified relying to a lesser on extent explicit pattern matching, and more on different heuristic rules. For phrase type identification and delimitation of these constituents, when adjacent to each other, a novel chunking technique, rank-based chunking, is applied. Following this, a series of further rules merge chunks into larger ones, aiming at a final number of nominal chunks compatible with the valency information of the main verb. The aim is to identify full nominal and adverbial constituents, including post-modifiers. The implementation uses the Stockholm Umeå Corpus 2.0, a corpus which is balanced for different genres in published Swedish text. SUC’s tagset is also used unmodified in part-of-speech tagging which enables the program to deal with input text. The functional parsing, which includes no explicit language-defining grammar component is carried out technically using an object-based representation of clause structure.

Although output formats and types of evaluations of correctness are very different in parsers for Swedish text, it is claimed that the manual approach presented can provide high accuracy, which can be improved given more time for development.

The thesis work also includes two prototype applications, both requiring high accuracy of the sort of functional syntactic analysis described here. The first one is an implementation of automatic syntactic fronting in the area of text editing for Swedish, where the user is presented with a syntactically analyzed copy of her writing, from which paraphrases easily can be generated. The second application is in the field of natural language query systems and produces questions with answers from an arbitrary declarative input text. This prototype incorporates a text database from Swedish Wikipedia, and investigates primarily generation of WH-questions formed via fronting of unbounded primary constituents. The questions are generated as a text is opened and thus permits users to only ask the available ones, thus aiming at a high precision value.},
	author       = {Wilhelmsson, Kenneth},
	year         = {2010},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
}