BibTeX

@inProceedings{rama-borin-2011-estimating-140688,
	title        = {Estimating Language Relationships from a Parallel Corpus. A Study of the Europarl Corpus},
	abstract     = {Since the 1950s, linguists have been using short lists (40–200 items) of basic vocabulary as the central component in a methodology which is claimed to make it possible to automatically calculate genetic
relationships among languages. In
the last few years these methods have experienced something of a revival, in that more languages are involved, different distance
measures are systematically compared and evaluated, and methods from computational biology are used for calculating language family trees. In this paper, we explore how this methodology
can be extended in another direction, by using larger word lists automatically extracted from a parallel corpus using word alignment software. We present preliminary
results from using the Europarl parallel corpus in this way for estimating the distances between some languages in the Indo-European language family.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2011},
	volume       = {11},
	pages        = {161--167},
}
Sidansvarig: sb-webb