@inProceedings{wichmann-virk-2020-towards-298431, title = { Towards a data-driven network of linguistic terms}, abstract = {Starting from close to 20,000 text docu-ments from the literature of language descrip-tions, from documents either born digitally orscanned and OCR’d, we extract keywords andpass them through a pruning pipeline wheremainly keywords that can be considered as be-longing to linguistic terminology survive. Sub-sequently we quantify relations among those terms using Normalized Pointwise Mutual In-formation (NPMI) and use the resulting measures, in conjunction with the Google PageRank (GPR), to build networks of linguistic terms. Two uses of the work are envisaged:(1) developing a search machine adapted to thelarge DReaM corpus of linguistic descriptive literature and (2) getting insights into how adata-driven ontology of linguistic terminology might be built.}, booktitle = {Swedish Language Technology Conference (SLTC)}, author = {Wichmann, Søren and Virk, Shafqat}, year = {2020}, }