@inProceedings{Malm-Per2018-267404, title = {LingFN: Towards a framenet for the linguistics domain}, abstract = {Framenets and frame semantics have proved useful for a number of natural language processing (NLP) tasks. However, in this connection framenets have often been criticized for limited coverage. A proposed reasonable-effort solution to this problem is to develop domain-specific (sublanguage) framenets to complement the corresponding general-language framenets for particular NLP tasks, and in the literature we find such initiatives covering, e.g., medicine, soccer, and tourism. In this paper, we report on our experiments and first results on building a framenet to cover the terms and concepts encountered in descriptive linguistic grammars. A contextual statistics based approach is used to judge the polysemous nature of domain-specific terms, and to design new domain-specific frames. The work is part of a more extensive research undertaking where we are developing NLP methodologies for automatic extraction of linguistic information from traditional linguistic descriptions to build typological databases, which otherwise are populated using a labor intensive manual process.}, booktitle = {Proceedings : LREC 2018 Workshop, International FrameNet Workshop 2018. Multilingual Framenets and Constructicons, May 12, 2018, Miyazaki, Japan / Edited by Tiago Timponi Torrent, Lars Borin and Collin F. Baker}, author = {Malm, Per and Virk, Shafqat and Borin, Lars and Saxena, Anju}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-04-7}, } @inProceedings{Borin-Lars2018-267534, title = {Many a little makes a mickle - infrastructure component reuse for a massively multilingual linguistic study}, abstract = {We present ongoing work aiming at turning the linguistic material available in Grierson’s classical Linguistic Survey of India (LSI) into a digital language resource, a database suitable for a broad array of linguistic investigations of the languages of South Asia and studies relating to language typology and contact linguistics. The project has two concrete main aims: (1) to conduct a linguistic investigation of the claim that South Asia constitutes a linguistic area; (2) to develop state-of-the-art language technology for automatically extracting the relevant information from the text of the LSI. In this presentation we focus on how, in the first part of the project, a number of existing research infrastructure components provided by Swe-Clarin, the Swedish CLARIN consortium, have been ‘recycled’ in order to allow the linguists involved in the project to quickly orient themselves in the vast LSI material, and to be able to provide input to the language technologists designing the tools for information extraction from the descriptive grammars.}, booktitle = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18–20 September 2017}, author = {Borin, Lars and Virk, Shafqat and Saxena, Anju}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköping}, ISBN = {978-91-7685-273-6}, } @inProceedings{Malm-Per2018-269086, title = {LingFN: Towards a framenet for the linguistics domain}, abstract = {Framenets and frame semantics have proved useful for a number of natural language processing (NLP) tasks. However, in this connection framenets have often been criticized for limited coverage. A proposed reasonable-effort solution to this problem is to develop domain-specific (sublanguage) framenets to complement the corresponding general-language framenets for particular NLP tasks, and in the literature we find such initiatives covering, e.g., medicine, soccer, and tourism. In this paper, we report on our experiments and first results on building a framenet to cover the terms and concepts encountered in descriptive linguistic grammars. A contextual statistics based approach is used to judge the polysemous nature of domain-specific terms, and to design new domain-specific frames. The work is part of a more extensive research undertaking where we are developing NLP methodologies for automatic extraction of linguistic information from traditional linguistic descriptions to build typological databases, which otherwise are populated using a labor intensive manual process.}, booktitle = {Proceedings of the LREC 2018 workshop: International FrameNet Workshop 2018 – Multilingual Framenets and Constructicons}, author = {Malm, Per and Virk, Shafqat and Borin, Lars and Saxena, Anju}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-04-7}, }