@inProceedings{Caprotti-Olga2012-178183, title = {High-quality translation: Molto tools and applications}, abstract = {MOLTO (Multilingual On Line Translation, FP7-ICT-247914, www.molto-project.eu) is a European project focusing on translation on the web. MOLTO targets translation that has production quality, that is, usable for quick and reliable dissemination of information. MOLTO’s main focus is to increase the productivity of such translation systems, building on the technology of GF (Grammatical Framework) and its Resource Grammar Library. But MOLTO also develops hybrid methods which increase the quality of Statistical Machine Translation (SMT) by adding linguistic information, or bootstrap grammatical models from statistical models. This paper gives a brief overview of MOLTO’s latest achievements, many of which are more thoroughly described in separate papers and available as web-based demos and as open-source software.}, booktitle = {The fourth Swedish Language Technology Conference (SLTC)}, author = {Caprotti, Olga and Ranta, Aarne and Angelov, Krasimir and Enache, Ramona and Camilleri, John J. and Dannélls, Dana and Détrez, Grégoire and Hallgren, Thomas and Prasad, K. V. S. and Virk, Shafqat}, year = {2012}, } @inProceedings{Prasad-K.V.S.2012-170274, title = {Computational evidence that Hindi and Urdu share a grammar but not the lexicon}, abstract = {Hindi and Urdu share a grammar and a basic vocabulary, but are often mutually unintelligible because they use different words in higher registers and sometimes even in quite ordinary situations. We report computational translation evidence of this unusual relationship (it differs from the usual pattern, that related languages share the advanced vocabulary and differ in the basics). We took a GF resource grammar for Urdu and adapted it mechanically for Hindi, changing essentially only the script (Urdu is written in Perso-Arabic, and Hindi in Devanagari) and the lexicon where needed. In evaluation, the Urdu grammar and its Hindi twin either both correctly translated an English sentence, or failed in exactly the same grammatical way, thus confirming computationally that Hindi andUrdu share a grammar. But the evaluation also found that the Hindi and Urdu lexicons differed in 18% of the basic words, in 31% of tourist phrases, and in 92% of school mathematics terms.}, booktitle = {3rd Workshop on South and Southeast Asian Natural Language Processing (SANLP)", collocated with COLING 12}, author = {Prasad, K. V. S. and Virk, Shafqat}, year = {2012}, } @book{Virk-Shafqat2012-170273, title = {Computational Grammar Resources for Indo-Iranian Languages}, author = {Virk, Shafqat}, year = {2012}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Virk-Shafqat2012-170271, title = {An Open Source Persian Computational Grammar}, abstract = {In this paper, we describe a multilingual open-source computational grammar of Persian, developed in Grammatical Framework (GF) – A type-theoretical grammar formalism. We discuss in detail the structure of different syntactic (i.e. noun phrases, verb phrases, adjectival phrases, etc.) categories of Persian. First, we show how to structure and construct these categories individually. Then we describe how they are glued together to make well-formed sentences in Persian, while maintaining the grammatical features such as agreement, word order, etc. We also show how some of the distinctive features of Persian, such as the ezafe construction, are implemented in GF. In order to evaluate the grammar’s correctness, and to demonstrate its usefulness, we have added support for Persian in a multilingual application grammar (the Tourist Phrasebook) using the reported resource grammar.}, booktitle = {Proceedings of the Eight International Conference on Language Resources and Evaluation (LREC'12)}, author = {Virk, Shafqat and ABOLAHRAR, ELNAZ}, year = {2012}, }