Skip to main content


	title        = { Towards a data-driven network of linguistic terms},
	abstract     = {Starting   from   close   to   20,000   text   docu-ments from the literature of language descrip-tions, from documents either born digitally orscanned and OCR’d, we extract keywords andpass  them  through  a  pruning  pipeline  wheremainly keywords that can be considered as be-longing to linguistic terminology survive. Sub-sequently  we  quantify  relations  among  those terms using Normalized Pointwise Mutual In-formation (NPMI) and use the resulting measures,  in  conjunction  with  the  Google  PageRank  (GPR),  to  build  networks  of  linguistic terms.   Two  uses  of  the  work  are  envisaged:(1) developing a search machine adapted to thelarge DReaM corpus of linguistic descriptive literature  and  (2)  getting  insights  into  how  adata-driven ontology of linguistic terminology might be built.},
	booktitle    = {Swedish Language Technology Conference (SLTC)},
	author       = {Wichmann, Søren and Virk, Shafqat},
	year         = {2020},