@inProceedings{Wichmann-Søren2020-298431,
title = { Towards a data-driven network of linguistic terms},
abstract = {Starting from close to 20,000 text docu-ments from the literature of language descrip-tions, from documents either born digitally orscanned and OCR’d, we extract keywords andpass them through a pruning pipeline wheremainly keywords that can be considered as be-longing to linguistic terminology survive. Sub-sequently we quantify relations among those terms using Normalized Pointwise Mutual In-formation (NPMI) and use the resulting measures, in conjunction with the Google PageRank (GPR), to build networks of linguistic terms. Two uses of the work are envisaged:(1) developing a search machine adapted to thelarge DReaM corpus of linguistic descriptive literature and (2) getting insights into how adata-driven ontology of linguistic terminology might be built.},
booktitle = {Swedish Language Technology Conference (SLTC)},
author = {Wichmann, Søren and Virk, Shafqat},
year = {2020},
}