BibTeX

@inProceedings{prasad-virk-2012-computational-170274,
	title        = {Computational evidence that Hindi and Urdu share a grammar but not the lexicon},
	abstract     = {Hindi and Urdu share a grammar and a basic vocabulary, but are often mutually unintelligible because they use diﬀerent words in higher registers and sometimes even in quite
ordinary situations. We report computational translation evidence of this unusual relationship (it diﬀers from the usual pattern, that related languages share the advanced vocabulary
and diﬀer in the basics). We took a GF resource grammar for Urdu and adapted it mechanically for Hindi, changing essentially only the script (Urdu is written in Perso-Arabic,
and Hindi in Devanagari) and the lexicon where needed. In evaluation, the Urdu grammar
and its Hindi twin either both correctly translated an English sentence, or failed in exactly
the same grammatical way, thus conﬁrming computationally that Hindi andUrdu share a
grammar. But the evaluation also found that the Hindi and Urdu lexicons diﬀered in 18%
of the basic words, in 31% of tourist phrases, and in 92% of school mathematics terms.},
	booktitle    = {3rd Workshop on South and Southeast Asian Natural Language Processing (SANLP)", collocated with COLING 12},
	author       = {Prasad, K. V. S. and Virk, Shafqat},
	year         = {2012},
}
Sidansvarig: sb-webb