Skip to main content
Språkbanken Text is a department within Språkbanken.

BibTeX

@inProceedings{berdicevskis-etal-2018-using-286919,
	title        = {Using Universal Dependencies in cross-linguistic complexity research},
	abstract     = {We evaluate corpus-based measures of linguistic complexity obtained using Universal Dependencies (UD) treebanks. We propose a method of estimating robustness of the complexity values obtained using a given measure and a given treebank. The results indicate that measures of syntactic complexity might be on average less robust than those of morphological complexity. We also estimate the validity of complexity measures by comparing the results for very similar languages and checking for
unexpected differences. We show that some of those differences that arise can be diminished by using parallel treebanks and, more importantly from the practical point of view, by harmonizing the language-specific solutions in the UD annotation.},
	booktitle    = {Proceedings of the Second Workshop on Universal Dependencies (UDW 2018), 8–17},
	author       = {Berdicevskis, Aleksandrs and Çöltekin, Çağrı and Ehret, Katharina and von Prince, Kilu and Ross, Daniel and Thompson, Bill and Yan, Chunxiao and Demberg, Vera and Lupyan, Gary and Rama, Taraka and Bentz, Christian},
	year         = {2018},
	publisher    = {Association for Computational Linguistics},
}

@inProceedings{rama-2015-automatic-218149,
	title        = {Automatic cognate identification with gap-weighted string subsequences. },
	booktitle    = {Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, May 31 – June 5, 2015 Denver, Colorado, USA},
	author       = {Rama, Taraka},
	year         = {2015},
	ISBN         = {978-1-941643-49-5},
	pages        = {1227--1231},
}

@incollection{rama-borin-2015-comparative-197484,
	title        = {Comparative evaluation of string similarity measures for automatic language classification.},
	booktitle    = {Sequences in Language and Text},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2015},
	publisher    = {De Gruyter Mouton},
	ISBN         = {978-3-11-036287-9},
}

@inProceedings{borin-etal-2014-linguistic-198551,
	title        = {Linguistic landscaping of South Asia using digital language resources: Genetic vs. areal linguistics},
	booktitle    = {Proceedings of LREC, May 26-31, 2014, Reykjavik, Iceland},
	author       = {Borin, Lars and Saxena, Anju and Rama, Taraka and Comrie, Bernard},
	year         = {2014},
	ISBN         = {978-2-9517408-8-4},
	pages        = {3137--3144},
}

@book{rama-2014-vocabulary-193330,
	title        = {Vocabulary lists in computational historical linguistics},
	author       = {Rama, Taraka},
	year         = {2014},
	publisher    = {University of Gothenburg},
	address      = {Göteborg},
	ISBN         = {978-91-87850-52-3},
}

@article{rama-2013-phonotactic-175455,
	title        = {Phonotactic diversity predicts the time depth of the world's language families},
	abstract     = {The ASJP (Automated Similarity Judgment Program) described an automated, lexical similarity-based method for dating the world’s language groups using 52 archaeological, epigraphic and historical calibration date points. The present paper describes a new automated dating method, based on phonotactic diversity. Unlike ASJP, our method does not require any information on the internal classification of a language group. Also, the method can use all the available word lists for a language and its dialects eschewing the debate on ‘language’ vs. ‘dialect’. We further combine these dates and provide a new baseline which, to our knowledge, is the best one. We make a systematic comparison of our method, ASJP’s dating procedure, and combined dates. We predict time depths for world’s language families and sub-families using this new baseline. Finally, we explain our results in the model of language change given by Nettle.},
	journal      = {PLoS ONE},
	author       = {Rama, Taraka},
	year         = {2013},
	volume       = {8},
	number       = {5},
	pages        = {e63238},
}

@inProceedings{rama-etal-2013-methods-187122,
	title        = {Two methods for automatic cognate identification.},
	booktitle    = {Proceedings of the 5th Conference on  Quantitative Investigations in Theoretical Linguistics QITL-5. University of Leuven,  12-14 September 2013},
	author       = {Rama, Taraka and Prasanth, Kolachina and Kolachina, Sudheer},
	year         = {2013},
	number       = {5},
	pages        = {76--80},
}

@article{rama-borin-2014-gram-187121,
	title        = {N-Gram Approaches to the Historical Dynamics of Basic Vocabulary},
	journal      = {Journal of Quantitative Linguistics},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2014},
	volume       = {21},
	number       = {1},
	pages        = {50--64},
}

@incollection{rama-kolachina-2013-distance-165135,
	title        = {Distance-based Phylogenetic Inference Algorithms in the Subgrouping of Dravidian Languages},
	booktitle    = {Approaches to Measuring Linguistic Differences},
	editor       = {Lars Borin and Anju Saxena.},
	author       = {Rama, Taraka and Kolachina, Sudheer},
	year         = {2013},
	publisher    = {De Gruyter Mouton},
	address      = {Berlin},
	ISBN         = {978-3-11-030525-8},
}

@inProceedings{rama-prasanth-2012-good-165075,
	title        = {How good are typological distances for determining genealogical relationships among languages?},
	abstract     = {The recent availability of typological databases such as World Atlas of Language Structures
(WALS) has spurred investigations regarding its utility for classifying the world’s languages,
the stability of typological features in genetic linguistics and typological universals across the
language families of the world. In this paper, we compare typological distances, derived from
fifteen vector similarity measures, with family internal classifications and also lexical divergence.
These results are only a first step towards the use of WALS database in the projection of NLP
resources and bootstrapping NLP tools for typologically or genetically similar, yet resource-poor
languages.
},
	booktitle    = {Proceedings of the 24th International Conference on Computational Linguistics},
	author       = {Rama, Taraka and Prasanth, Kolachina},
	year         = {2012},
}

@inProceedings{kolachina-etal-2011-maximum-147889,
	title        = {Maximum parsimony method in the subgrouping of Dravidian languages},
	booktitle    = {Quantitative Investigations in Theoretical Linguistics},
	author       = {Kolachina, Sudheer and Rama, Taraka and Bai, Lakshmi},
	year         = {2011},
	volume       = {4},
	pages        = {52--56},
}

@article{wichmann-etal-2011-phonological-147887,
	title        = {Phonological diversity, word length, and population sizes across languages: The ASJP evidence},
	abstract     = {Previous literature has reported a positive correlation between phoneme inventory sizes and population sizes for languages, indicating that larger languages tend to make more phonological distinctions, and claims have also been made that average word length and phoneme inventory sizes are negatively correlated. Yet another relevant variable is geography, since the spatial propinquity of languages influences the similarity of their overall typological profile; moreover, specific historical events affecting language distributions, such as migrations or the development of certain cultural advantages, are usually also anchored geographically. In this paper we replicate previous findings on a substantially larger set of data drawn from comparative wordlists in the database of the Automated Similarity Judgment Program (ASJP), and discuss the relationships among the three variables mentioned in the title of the paper as well the influence of geography, including the idea that phonemic diversity across the world's languages provides evidence for an out-of-Africa model of the expansion of languages.
},
	journal      = {Linguistic Typology},
	author       = {Wichmann, Søren and Rama, Taraka and Holman, Eric},
	year         = {2011},
	volume       = {15},
	number       = {2},
	pages        = {177--197},
}

@article{wichmann-etal-2011-correlates-158467,
	title        = {Correlates of reticulation in linguistic phylogenies},
	journal      = {Language Dynamics and Change.},
	author       = {Wichmann, Søren and Holman, Eric and Rama, Taraka and Walker, Robert S.},
	year         = {2011},
	volume       = {1},
	number       = {2},
	pages        = {205--240},
}

@inProceedings{rama-2012-gram-159106,
	title        = {N-gram approaches to the historical dynamics of basic vocabulary},
	booktitle    = {Preproceedings of Computational approaches to the study of dialectal and typological variation },
	author       = {Rama, Taraka},
	year         = {2012},
}

@inProceedings{rama-borin-2012-properties-164449,
	title        = {Properties of phoneme N -grams across the world’s language families},
	abstract     = {In this article, we investigate the properties of phoneme N -grams across half of the world’s languages. The sizes of three different N -gram distributions of the world’s language families obey a power law. Further, the N -gram distributions of language families parallel the sizes of the families, which also follow a power law distribution. The correlation between N -gram distributions and language family sizes improves with increasing values of N . The study also raises some new questions about the use of N -gram distributions in linguistic research, which we hope to be able to investigate in the future.},
	booktitle    = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2012},
}

@inProceedings{rama-borin-2011-estimating-140688,
	title        = {Estimating Language Relationships from a Parallel Corpus. A Study of the Europarl Corpus},
	abstract     = {Since the 1950s, linguists have been using short lists (40–200 items) of basic vocabulary as the central component in a methodology which is claimed to make it possible to automatically calculate genetic
relationships among languages. In
the last few years these methods have experienced something of a revival, in that more languages are involved, different distance
measures are systematically compared and evaluated, and methods from computational biology are used for calculating language family trees. In this paper, we explore how this methodology
can be extended in another direction, by using larger word lists automatically extracted from a parallel corpus using word alignment software. We present preliminary
results from using the Europarl parallel corpus in this way for estimating the distances between some languages in the Indo-European language family.},
	booktitle    = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)},
	author       = {Rama, Taraka and Borin, Lars},
	year         = {2011},
	volume       = {11},
	pages        = {161--167},
}