@inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Alfter-David2019-281344, title = {Lärka: From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpusbased exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN. Lärka has recently received a new responsive user interface adapted to different devices with different screen sizes. Moreover, the system has also been augmented with new functionalities. These recent additions aim at improving the usability and the usefulness of the platform for pedagogical purposes. The most important development, though, is the adaptation of the platform to serve as a component in an e-infrastructure supporting research on language learning and multilingualism. Thanks to Lärka’s service-oriented architecture, most functionalities are also available as web services which can be easily re-used by other applications.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2019}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7685-034-3}, } @inProceedings{Rouces-Jacobo2019-281308, title = {Tracking Attitudes Towards Immigration in Swedish Media}, abstract = {We use a gold standard under construction for sentiment analysis in Swedish to explore how attitudes towards immigration change across time and media. We track the evolution of attitude starting from the year 2000 for three different Swedish media: the national newspapers Aftonbladet and Svenska Dagbladet, representing different halves of the left–right political spectrum, and the online forum Flashback.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @inProceedings{Rouces-Jacobo2019-281307, title = {Political Stance Analysis Using Swedish Parliamentary Data}, abstract = {We process and visualize Swedish parliamentary data using methods from statistics and machine learning, which allows us to obtain insight into the political processes behind the data. We produce plots that let us infer the relative stance of political parties and their members on different topics. In addition, we can infer the degree of homogeneity of individual votes within different parties, as well as the degree of multi-dimensionality of Swedish politics.}, booktitle = {CEUR Workshop Proceedings (Vol. 2364). Digital Humanities in the Nordic Countries 4th Conference }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina}, year = {2019}, publisher = {CEUR Workshop Proceedings}, } @techreport{Ljunglöf-Peter2019-281222, title = {Assessing the quality of Språkbanken’s annotations}, abstract = {Most of the corpora in Språkbanken Text consist of unannotated plain text, such as almost all newspaper texts, social media texts, novels and official documents. We also have some corpora that are manually annotated in different ways, such as Talbanken (annotated for part-of-speech and syntactic structure), and the Stockholm Umeå Corpus (annotated for part-of-speech). Språkbanken’s annotation pipeline Sparv aims to automatise the work of automatically annotating all our corpora, while still keeping the manual annotations intact. When all corpora are annotated, they can be made available, e.g., in the corpus searh tools Korp and Strix. Until now there has not been any comprehensive overview of the annotation tools and models that Sparv has been using for the last eight years. Some of them have not been updated since the start, such as the part-of-speech tagger Hunpos and the dependency parser MaltParser. There are also annotation tools that we still have not included, such as a constituency-based parser. Therefore Språkbanken initiated a project with the aim of conducting such an overview. This document is the outcome of that project, and it contains descriptions of the types of manual and automatic annotations that we currently have in Språkbanken, as well as an incomplete overview of the state-of-the-art with regards to annotation tools and models. }, author = {Ljunglöf, Peter and Zechner, Niklas and Nieto Piña, Luis and Adesam, Yvonne and Borin, Lars}, year = {2019}, } @inProceedings{Rouces-Jacobo2018-264721, title = {Defining a gold standard for a Swedish sentiment lexicon: Towards higher-yield text mining in the digital humanities}, abstract = {There is an increasing demand for multilingual sentiment analysis, and most work on sentiment lexicons is still carried out based on English lexicons like WordNet. In addition, many of the non-English sentiment lexicons that do exist have been compiled by (machine) translation from English resources, thereby arguably obscuring possible language-specific characteristics of sentiment-loaded vocabulary. In this paper we describe the creation from scratch of a gold standard for the sentiment annotation of Swedish terms as a first step towards the creation of a full-fledged sentiment lexicon for Swedish.}, booktitle = {CEUR Workshop Proceedings vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018. Edited by Eetu Mäkelä Mikko Tolonen Jouni Tuominen }, author = {Rouces, Jacobo and Borin, Lars and Tahmasebi, Nina and Eide, Stian Rødven}, year = {2018}, publisher = {University of Helsinki, Faculty of Arts}, adress = {Helsinki}, } @inProceedings{Rouces-Jacobo2018-264720, title = {SenSALDO: Creating a Sentiment Lexicon for Swedish}, abstract = {The natural language processing subfield known as sentiment analysis or opinion mining has seen an explosive expansion over the last decade or so, and sentiment analysis has become a standard item in the NLP toolbox. Still, many theoretical and methodological questions remain unanswered and resource gaps unfilled. Most work on automated sentiment analysis has been done on English and a few other languages; for most written languages of the world, this tool is not available. This paper describes the development of an extensive sentiment lexicon for written (standard) Swedish. We investigate different methods for developing a sentiment lexicon for Swedish. We use an existing gold standard dataset for training and testing. For each word sense from the SALDO Swedish lexicon, we assign a real value sentiment score in the range [-1,1] and produce a sentiment label. We implement and evaluate three methods: a graph-based method that iterates over the SALDO structure, a method based on random paths over the SALDO structure and a corpus-driven method based on word embeddings. The resulting sense-disambiguated sentiment lexicon (SenSALDO) is an open source resource and freely available from Språkbanken, The Swedish Language Bank at the University of Gothenburg.}, booktitle = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, 7-12 May 2018, Miyazaki (Japan)}, author = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Eide, Stian Rødven}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-00-9}, } @inProceedings{Rouces-Jacobo2018-264719, title = {Generating a Gold Standard for a Swedish Sentiment Lexicon}, abstract = {We create a gold standard for sentiment annotation of Swedish terms, using the freely available SALDO lexicon and the Gigaword corpus. For this purpose, we employ a multi-stage approach combining corpus-based frequency sampling, direct score annotation and Best-Worst Scaling. In addition to obtaining a gold standard, we analyze the data from our process and we draw conclusions about the optimal sentiment model.}, booktitle = {LREC 2018, Eleventh International Conference on Language Resources and Evaluation, May 7-12, 2018, Miyazaki (Japan)}, author = {Rouces, Jacobo and Tahmasebi, Nina and Borin, Lars and Eide, Stian Rødven}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-00-9}, } @inProceedings{Karsvall-Olof2018-265603, title = {SDHK meets NER: Linking place names with medieval charters and historical maps}, booktitle = {CEUR Workshop Proceedings, vol. 2084. Proceedings of the Digital Humanities in the Nordic Countries 3rd Conference Helsinki, Finland, March 7-9, 2018. Edited by Eetu Mäkelä Mikko Tolonen Jouni Tuominen }, author = {Karsvall, Olof and Borin, Lars}, year = {2018}, publisher = {University of Helsinki, Faculty of Arts}, adress = {Helsinki}, } @misc{Torrent-TiagoTimponi2018-267405, title = {Proceedings of the LREC 2018 Workshop International FrameNet Workshop 2018: Multilingual Framenets and Constructicons. 12 May 2018 – Miyazaki, Japan}, abstract = {The International FrameNet Workshop 2018 brought together researchers in Frame Semantics and Construction Grammar, two areas which have traditionally been interrelated, but which have been developing somewhat independently in recent years. It is also addressed at language technology researchers working with language resources based on Frame Semantics or Construction Grammar. The workshop follows on from similar joint meetings in Berkeley, California in 2013 (IFNW 2013, sponsored by the Swedish FrameNet group) and in Juiz de Fora, Brazil in 2016 (IFNW 2016, sponsored by FrameNet Brasil), and will cover the rapidly unfolding developments in both areas and recent research on their interconnections.}, author = {Torrent, Tiago Timponi and Borin, Lars and Baker, Collin}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-04-7}, } @inProceedings{Malm-Per2018-267404, title = {LingFN: Towards a framenet for the linguistics domain}, abstract = {Framenets and frame semantics have proved useful for a number of natural language processing (NLP) tasks. However, in this connection framenets have often been criticized for limited coverage. A proposed reasonable-effort solution to this problem is to develop domain-specific (sublanguage) framenets to complement the corresponding general-language framenets for particular NLP tasks, and in the literature we find such initiatives covering, e.g., medicine, soccer, and tourism. In this paper, we report on our experiments and first results on building a framenet to cover the terms and concepts encountered in descriptive linguistic grammars. A contextual statistics based approach is used to judge the polysemous nature of domain-specific terms, and to design new domain-specific frames. The work is part of a more extensive research undertaking where we are developing NLP methodologies for automatic extraction of linguistic information from traditional linguistic descriptions to build typological databases, which otherwise are populated using a labor intensive manual process.}, booktitle = {Proceedings : LREC 2018 Workshop, International FrameNet Workshop 2018. Multilingual Framenets and Constructicons, May 12, 2018, Miyazaki, Japan / Edited by Tiago Timponi Torrent, Lars Borin and Collin F. Baker}, author = {Malm, Per and Virk, Shafqat and Borin, Lars and Saxena, Anju}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-04-7}, } @incollection{Haugen-Einar2018-267403, title = {Danish, Norwegian and Swedish}, booktitle = {The world's major languages / edited by Bernard Comrie }, author = {Haugen, Einar and Borin, Lars}, year = {2018}, publisher = {Routledge}, adress = {London and New York}, ISBN = {9781138184824}, pages = {127--150}, } @inProceedings{Borin-Lars2018-267534, title = {Many a little makes a mickle - infrastructure component reuse for a massively multilingual linguistic study}, abstract = {We present ongoing work aiming at turning the linguistic material available in Grierson’s classical Linguistic Survey of India (LSI) into a digital language resource, a database suitable for a broad array of linguistic investigations of the languages of South Asia and studies relating to language typology and contact linguistics. The project has two concrete main aims: (1) to conduct a linguistic investigation of the claim that South Asia constitutes a linguistic area; (2) to develop state-of-the-art language technology for automatically extracting the relevant information from the text of the LSI. In this presentation we focus on how, in the first part of the project, a number of existing research infrastructure components provided by Swe-Clarin, the Swedish CLARIN consortium, have been ‘recycled’ in order to allow the linguists involved in the project to quickly orient themselves in the vast LSI material, and to be able to provide input to the language technologists designing the tools for information extraction from the descriptive grammars.}, booktitle = {Selected papers from the CLARIN Annual Conference 2017, Budapest, 18–20 September 2017}, author = {Borin, Lars and Virk, Shafqat and Saxena, Anju}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköping}, ISBN = {978-91-7685-273-6}, } @incollection{Borin-Lars2018-269047, title = {Language technology and 3rd wave HCI: Towards phatic communication and situated interaction}, abstract = {In the field of language technology, researchers are starting to pay more attention to various interactional aspects of language – a development prompted by a confluence of factors, and one which applies equally to the processing of written and spoken language. Notably, the so-called ‘phatic’ aspects of linguistic communication are coming into focus in this work, where linguistic interaction is increasingly recognized as being fundamentally situated. This development resonates well with the concerns of third wave HCI, which involves a shift in focus from stating the requirements on HCI design primarily in terms of “context-free” information flow, to a view where it is recognized that HCI – just like interaction among humans – is indissolubly embedded in complex, shifting contexts. These – together with the different backgrounds and intentions of interaction participants – shape the interaction in ways which are not readily understandable in terms of rational information exchange, but which are nevertheless central aspects of the interaction, and which therefore must be taken into account in HCI design, including its linguistic aspects, forming the focus of this chapter.}, booktitle = {New Directions in Third Wave Human-Computer Interaction: Volume 1 - Technologies / edited by Michael Filimowicz, Veronika Tzankova.}, author = {Borin, Lars and Edlund, Jens}, year = {2018}, publisher = {Springer International Publishing}, adress = {Cham}, ISBN = {978-3-319-73355-5}, pages = {251--264}, } @incollection{Lyngfelt-Benjamin2018-269085, title = {Constructicography at work: Theory meets practice in the Swedish constructicon}, abstract = {This chapter addresses central topics in constructicography from the viewpoint of the Swedish constructicon project (SweCcn), focusing on practical constructicon development. The full process of construction description is described and discussed, from selection via corpus analysis to finished constructicon entry and beyond, towards structuring the set of entries into a network. Particular attention is given to the description format and the treatment of constructional variation. A main theme in the chapter is the interdependence and alignment of SweCcn and related resources, on the one hand in the local context, notably the infrastructure of Språkbanken (the Swedish language bank), and on the other hand with respect to corresponding resources for other languages. Of key concern is the relation to FrameNet, both the Swedish and other framenets, and a major section is devoted to conditions for linking constructions and frames.}, booktitle = {Constructicography: Constructicon development across languages / edited by Benjamin Lyngfelt, Lars Borin, Kyoko Ohara and Tiago Timponi Torrent }, author = {Lyngfelt, Benjamin and Bäckström, Linnéa and Borin, Lars and Ehrlemark, Anna and Rydstedt, Rudolf}, year = {2018}, publisher = {John Benjamins}, adress = {Amsterdam}, ISBN = {9789027263865}, pages = {41--106}, } @incollection{Borin-Lars2018-269084, title = {Linguistics vs. language technology in constructicon building and use}, abstract = {In this chapter, we describe the close interaction of linguists and language technologists in the Swedish constructicon project. This kind of collaboration is not so common today, because of the way that language technology has developed in recent decades, but in our case the collaboration has been very successful, and constituted a genuine instance of cross-fertilization, where an evolving language technology infrastructure and a computational lexical macroresource described in the chapter has formed an integral part of the Swedish constructicon development environment, while at the same time the structured linguistic knowledge described in the constructicon has informed the language technology making up the infrastructure.}, booktitle = {Constructicography: Constructicon development across languages / edited by Benjamin Lyngfelt, Lars Borin, Kyoko Ohara, Tiago Timponi Torrent}, author = {Borin, Lars and Dannélls, Dana and Gruzitis, Normunds}, year = {2018}, publisher = {John Benjamins}, adress = {Amsterdam}, ISBN = {9789027263865}, pages = {229--253}, } @book{Lyngfelt-Benjamin2018-269082, title = {Constructicography: Constructicon development across languages}, abstract = {In constructionist theory, a constructicon is an inventory of constructions making up the full set of linguistic units in a language. In applied practice, it is a set of construction descriptions – a “dictionary of constructions”. The development of constructicons in the latter sense typically means combining principles of both construction grammar and lexicography, and is probably best characterized as a blend between the two traditions. We call this blend constructicography. The present volume is a comprehensive introduction to the emerging field of constructicography. After a general introduction follow six chapters presenting constructicon projects for English, German, Japanese, Brazilian Portuguese, Russian, and Swedish, respectively, often in relation to a framenet of the language. In addition, there is a chapter addressing the interplay between linguistics and language technology in constructicon development, and a final chapter exploring the prospects for interlingual constructicography. This is the first major publication devoted to constructicon development and it should be particularly relevant for those interested in construction grammar, frame semantics, lexicography, the relation between grammar and lexicon, or linguistically informed language technology. }, author = {Lyngfelt, Benjamin and Borin, Lars and Ohara, Kyoko and Torrent, Tiago Timponi}, year = {2018}, publisher = {John Benjamins}, adress = {Amsterdam}, ISBN = {9789027263865}, } @inProceedings{Malm-Per2018-269086, title = {LingFN: Towards a framenet for the linguistics domain}, abstract = {Framenets and frame semantics have proved useful for a number of natural language processing (NLP) tasks. However, in this connection framenets have often been criticized for limited coverage. A proposed reasonable-effort solution to this problem is to develop domain-specific (sublanguage) framenets to complement the corresponding general-language framenets for particular NLP tasks, and in the literature we find such initiatives covering, e.g., medicine, soccer, and tourism. In this paper, we report on our experiments and first results on building a framenet to cover the terms and concepts encountered in descriptive linguistic grammars. A contextual statistics based approach is used to judge the polysemous nature of domain-specific terms, and to design new domain-specific frames. The work is part of a more extensive research undertaking where we are developing NLP methodologies for automatic extraction of linguistic information from traditional linguistic descriptions to build typological databases, which otherwise are populated using a labor intensive manual process.}, booktitle = {Proceedings of the LREC 2018 workshop: International FrameNet Workshop 2018 – Multilingual Framenets and Constructicons}, author = {Malm, Per and Virk, Shafqat and Borin, Lars and Saxena, Anju}, year = {2018}, publisher = {ELRA}, adress = {Miyazaki}, ISBN = {979-10-95546-04-7}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @inProceedings{Alfter-David2018-275364, title = {From Language Learning Platform to Infrastructure for Research on Language Learning}, abstract = {Lärka is an Intelligent Computer-Assisted Language Learning (ICALL) platform developed at Språkbanken, as a flexible and a valuable source of additional learning material (e.g. via corpus- based exercises) and a support tool for both teachers and L2 learners of Swedish and students of (Swedish) linguistics. Nowadays, Lärka is being adapted into a central building block in an emerging second language research infrastructure within a larger context of the text-based research infrastructure developed by the national Swedish Language bank, Språkbanken, and SWE-CLARIN.}, booktitle = {Proceedings of CLARIN-2018 conference, Pisa, Italy}, author = {Alfter, David and Borin, Lars and Pilán, Ildikó and Lindström Tiedemann, Therese and Volodina, Elena}, year = {2018}, } @misc{Pilán-Ildikó2018-275358, title = {Proceedings of the 7th Workshop on NLP for Computer Assisted Language Learning (NLP4CALL 2018), SLTC, Stockholm, 7th November 2018 }, abstract = {The primary goal of the workshop series on Natural Language Processing for Computer-Assisted Language Learning (NLP4CALL) is to create a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The latter includes, among others, insights from Second Language Acquisition (SLA) research, on the one hand, and promoting the development of “Computational SLA” through setting up Second Language research infrastructure(s), on the other. The intersection of Natural Language Processing (or Language Technology / Computational Linguistics) and Speech Technology with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has given the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition theories and practices, as well as knowledge of second language pedagogy and didactics. This workshop invites therefore a wide range of ICALL-relevant research, including studies where NLP-enriched tools are used for testing SLA and pedagogical theories, and vice versa, where SLA theories, pedagogical practices or empirical data are modeled in ICALL tools. The NLP4CALL workshop series is aimed at bringing together competencies from these areas for sharing experiences and brainstorming around the future of the field.}, author = {Pilán, Ildikó and Volodina, Elena and Alfter, David and Borin, Lars}, year = {2018}, publisher = {Linköping University Electronic Press}, adress = {Linköpings universitet}, ISBN = {978-91-7685-173-9}, } @inProceedings{Adesam-Yvonne2018-273839, title = {The Eukalyptus Treebank of Written Swedish}, booktitle = {Seventh Swedish Language Technology Conference (SLTC), Stockholm, 7–9 November 2018}, author = {Adesam, Yvonne and Bouma, Gerlof and Johansson, Richard and Borin, Lars and Forsberg, Markus}, year = {2018}, } @techreport{Hammarstedt-Martin2017-256056, title = {Korp 6 - Användarmanual}, author = {Hammarstedt, Martin and Borin, Lars and Forsberg, Markus and Roxendal, Johan and Schumacher, Anne and Öhrman, Maria}, year = {2017}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, } @techreport{Hammarstedt-Martin2017-256055, title = {Korp 6 - Technical Report}, author = {Hammarstedt, Martin and Roxendal, Johan and Öhrman, Maria and Borin, Lars and Forsberg, Markus and Schumacher, Anne}, year = {2017}, publisher = {Institutionen för svenska språket, Göteborgs universitet}, } @article{Pilán-Ildikó2017-260382, title = {Candidate sentence selection for language learning exercises: From a comprehensive framework to an empirical evaluation}, abstract = {We present a framework and its implementation relying on Natural Language Processing methods, which aims at the identification of exercise item candidates from corpora. The hybrid system combining heuristics and machine learning methods includes a number of relevant selection criteria. We focus on two fundamental aspects: linguistic complexity and the dependence of the extracted sentences on their original context. Previous work on exercise generation addressed these two criteria only to a limited extent, and a refined overall candidate sentence selection framework appears also to be lacking. In addition to a detailed description of the system, we present the results of an empirical evaluation conducted with language teachers and learners which indicate the usefulness of the system for educational purposes. We have integrated our system into a freely available online learning platform.}, author = {Pilán, Ildikó and Volodina, Elena and Borin, Lars}, year = {2017}, volume = {57}, number = {3}, pages = {67--91}, } @inProceedings{Borin-Lars2017-261157, title = {Swe-Clarin: Language resources and technology for Digital Humanities}, abstract = {CLARIN is a European Research Infrastructure Consortium (ERIC), which aims at (a) making extensive language-based materials available as primary research data to the humanities and social sciences (HSS); and (b) offering state-of-the-art language technology (LT) as an e-research tool for this purpose, positioning CLARIN centrally in what is often referred to as the digital humanities (DH). The Swedish CLARIN node Swe-Clarin was established in 2015 with funding from the Swedish Research Council. In this paper, we describe the composition and activities of Swe-Clarin, aiming at meeting the requirements of all HSS and other researchers whose research involves using text and speech as primary research data, and spreading the awareness of what Swe-Clarin can offer these research communities. We focus on one of the central means for doing this: pilot projects conducted in collaboration between HSS researchers and Swe-Clarin, together formulating a research question, the addressing of which requires working with large language-based materials. Four such pilot projects are described in more detail, illustrating research on rhetorical history, second-language acquisition, literature, and political science. A common thread to these projects is an aspiration to meet the challenge of conducting research on the basis of very large amounts of textual data in a consistent way without losing sight of the individual cases making up the mass of data, i.e., to be able to move between Moretti’s “distant” and “close reading” modes. While the pilot projects clearly make substantial contributions to DH, they also reveal some needs for more development, and in particular a need for document-level access to the text materials. As a consequence of this, work has now been initiated in Swe-Clarin to meet this need, so that Swe-Clarin together with HSS scholars investigating intricate research questions can take on the methodological challenges of big-data language-based digital humanities.}, booktitle = {Digital Humanities 2016. Extended Papers of the International Symposium on Digital Humanities (DH 2016) Växjö, Sweden, November, 7-8, 2016. Edited by Koraljka Golub, Marcelo Milra. Vol-2021}, author = {Borin, Lars and Tahmasebi, Nina and Volodina, Elena and Ekman, Stefan and Jordan, Caspar and Viklund, Jon and Megyesi, Beáta and Näsman, Jesper and Palmér, Anne and Wirén, Mats and Björkenstam, Kristina and Grigonyte, Gintare and Gustafson Capková, Sofia and Kosiński, Tomasz}, year = {2017}, publisher = {M. Jeusfeld c/o Redaktion Sun SITE, Informatik V, RWTH Aachen.}, adress = {Aachen}, } @inProceedings{Virk-Shafqat2017-261789, title = {Automatic extraction of typological linguistic features from descriptive grammars}, abstract = {The present paper describes experiments on automatically extracting typological linguistic features of natural languages from traditional written descriptive grammars. The feature-extraction task has high potential value in typological, genealogical, historical, and other related areas of linguistics that make use of databases of structural features of languages. Until now, extraction of such features from grammars has been done manually, which is highly time and labor consuming and becomes prohibitive when extended to the thousands of languages for which linguistic descriptions are available. The system we describe here starts from semantically parsed text over which a set of rules are applied in order to extract feature values. We evaluate the system’s performance on the manually curated Grambank database as the gold standard and report the first measures of precision and recall for this problem.}, booktitle = {Text, Speech, and Dialogue 20th International Conference, TSD 2017, Prague, Czech Republic, August 27-31, 2017, Proceedings / edited by Kamil Ekštein, Václav Matoušek.}, author = {Virk, Shafqat and Borin, Lars and Saxena, Anju and Hammarström, Harald}, year = {2017}, publisher = {Springer International Publishing}, adress = {Cham}, ISBN = {978-3-319-64205-5}, } @inProceedings{Volodina-Elena2017-262848, title = {SVALex. En andraspråksordlista med CEFR-nivåer}, abstract = {När man planerar att utveckla en språkkurs i ett andra- eller främmandespråk (L2) ställs man inför utmaningen att definiera vilket ordförråd inlärarna behöver tillägna sig. Forskning inom andraspråksinlärning tyder på att läsaren behöver kunna 95–98 % av löporden i en text för att förstå den (Laufer & Ravenhorst-Kalovski 2010). Sådana studier är användbara för att uppskatta storleken på det ordförråd som behövs för att tillägna sig innehållet i en text, men de ger ingen närmare metodologisk vägledning för den som vill utveckla nivåstrukturerade läromedel eller kurser för andraspråksundervisning. Speciellt tydligt är detta inom CALL, Computer-Assisted Language Learning, där läromaterial (t.ex. övningar) genereras automatiskt, och behöver elektroniska resurser som kunskapskälla. Man kan istället angripa problemet från andra hållet. Om man har en samling nivåklassificerade texter för andraspråksinlärare kan man utifrån dem bygga ordlistor där varje ord är placerat på en färdighetsskala. Om man känner till den förutsatta färdighetsnivån hos läsaren, kan man helt enkelt anta att den textnivå där ett ord dyker upp första gången också anger ordets svårighetsgrad. SVALex är ett lexikon som har byggts enligt den principen. Resursen ska kunna användas av inlärare och lärare i svenska som andraspråk, men även av lexikografer, av kursutvecklare och provkonstruktörer samt av dem som likt oss själva ägnar sig åt utveckling av språkteknologibaserade datorstöd för språkinlärning och språktestning. SVALex utgör en vidareutveckling i förhållande till tidigare lexikonresurser för svenska som andraspråk (se avsnitt 2), genom att den konsekvent relaterar de 15 681 lexikoningångarna till en vida använd färdighetsskala för andra- och främmandespråksinlärning, Europarådets gemensamma europeiska referensram för språk (Common European Framework of Reference, i fortsättningen refererad till som CEFR) (Council of Europe 2001; Skolverket 2009). Nivåklassningen av lexikonenheterna i SVALex görs på basis av deras distribution i COCTAILL, en korpus innehållande lärobokstexter i svenska som andraspråk, där lärare har placerat in varje text i någon av CEFR-nivåerna (Volodina et al. 2014). }, booktitle = {Svenskans beskrivning. 35, Förhandlingar vid trettiofemte sammankomsten : Göteborg 11–13 maj 2016 / Redigerad av Emma Sköldberg, Maia Andréasson, Henrietta Adamsson Eryd, Filippa Lindahl, Sven Lindström, Julia Prentice & Malin Sandberg}, author = {Volodina, Elena and Borin, Lars and Pilán, Ildikó and François, Thomas and Tack, Annaïs}, year = {2017}, publisher = {Göteborgs universitet}, adress = {Göteborg}, ISBN = {978-91-87850-64-6}, } @misc{Volodina-Elena2017-262846, title = {Preface. Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017}, abstract = {For the second year in a row we brought two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together. The goal of organizing joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications. The resulting volume covers a variety of topics from the two fields and - hopefully - showcases the challenges and achievements in the field. The seven papers in this volume cover native language identification in learner writings, using syntactic complexity development in language learner language to identify reading comprehension texts of appropriate level, exploring the potential of parallel corpora to predict mother-language specific problem areas for learners of another language, tools for learning languages - both well-resourced ones such as English as well as endangered or under-resourced ones such as Yakut and Võro, as well as exploring the potential of automatically identifying and correcting word-level errors in Swedish learner writing.}, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina}, year = {2017}, volume = {30}, pages = {i--vi}, } @misc{Volodina-Elena2017-262838, title = {Proceedings of the Joint 6th Workshop on NLP for Computer Assisted Language Learning and 2nd Workshop on NLP for Research on Language Acquisition at NoDaLiDa 2017, Gothenburg, 22nd May 2017}, abstract = {For the second year in a row we have brought the two related themes of NLP for Computer-Assisted Language Learning and NLP for Language Acquisition together under one umbrella. The goal of organizing these joint workshops is to provide a meeting place for researchers working on language learning issues including both empirical and experimental studies and NLP-based applications.}, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Grigonyte, Gintare and Nilsson Björkenstam, Kristina}, year = {2017}, publisher = {Linköping University Press}, adress = {Linköping, Sweden}, ISBN = { 978-91-7685-502-7}, } @techreport{Borin-Lars2016-233768, title = {A free cloud service for OCR / En fri molntjänst för OCR}, author = {Borin, Lars and Bouma, Gerlof and Dannélls, Dana}, year = {2016}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Tahmasebi-Nina2016-233899, title = {SWE-CLARIN – the Swedish CLARIN project – aims and activities}, booktitle = {Digital Humanities in the Nordic countries, Oslo, March 15-17 2016}, author = {Tahmasebi, Nina and Borin, Lars and Jordan, Caspar and Ekman, Stefan}, year = {2016}, pages = {122--123}, } @inProceedings{Viklund-Jon2016-236738, title = {How can big data help us study rhetorical history?}, abstract = {Rhetorical history is traditionally studied through rhetorical treatises or selected rhetorical practices, for example the speeches of major orators. Although valuable sources, these do not give us the answers to all our questions. Indeed, focus on a few canonical works or the major historical key figures might even lead us to reproduce cultural self-identifications and false generalizations. However, thanks to increasing availability of relevant digitized texts, we are now at a point where it is possible to see how new research questions can be formulated – and how old research questions can be addressed from a new angle or established results verified – on the basis of exhaustive collections of data, rather than small samples, but where a methodology has not yet established itself. The aim of this paper is twofold: (1) We wish to demonstrate the usefulness of large-scale corpus studies (“text mining”) in the field of rhetorical history, and hopefully point to some interesting research problems and how they can be analyzed using “big-data” methods. (2) In doing this, we also aim to make a contribution to method development in e-science for the humanities and social sciences, and in particular in the framework of CLARIN. }, booktitle = {Linköping Electronic Conference Proceedings, No. 123. Edited by Koenraad De Smedt. Selected Papers from the CLARIN Annual Conference 2015. October 14–16, 2015, Wroclaw, Poland}, author = {Viklund, Jon and Borin, Lars}, year = {2016}, volume = {123}, ISBN = {978-91-7685-765-6}, pages = {79--93}, } @article{Rehm-Georg2016-237609, title = {The strategic impact of META-NET on the regional, national and international level}, abstract = {This article provides an overview of the dissemination work carried out in META-NET from 2010 until 2015; we describe its impact on the regional, national and international level, mainly with regard to politics and the funding situation for LT topics. The article documents the initiative’s work throughout Europe in order to boost progress and innovation in our field.}, author = {Rehm, Georg and Uszkoreit, Hans and Ananiadou, Sophia and Bel, Núria and Bielevičienė, Audronė and Borin, Lars and Branco, António and Budin, Gerhard and Calzolari, Nicoletta and Daelemans, Walter and Garabík, Radovan and Grobelnik, Marko and García-Mateo, Carmen and Genabith, Josef Van and Hajič, Jan and Hernáez, Inma and Judge, John and Koeva, Svetla and Krek, Simon and Krstev, Cvetana and Lindén, Krister and Magnini, Bernardo and Mariani, Joseph and Mcnaught, John and Melero, Maite and Monachini, Monica and Moreno, Asunción and Odijk, Jan and Ogrodniczuk, Maciej and Pęzik, Piotr and Piperidis, Stelios and Przepiórkowski, Adam and Rögnvaldsson, Eiríkur and Rosner, Mike and Pedersen, Bolette Sandford and Skadiņa, Inguna and De Smedt, Koenraad and Tadić, Marko and Thompson, Paul and Tufiş, Dan and Váradi, Tamás and Vasiļjevs, Andrejs and Vider, Kadri and Zabarskaitė, Jolanta}, year = {2016}, volume = {50}, number = {2}, pages = {351--374}, } @article{Adesam-Yvonne2016-237884, title = {Språkteknologi för svenska språket genom tiderna}, abstract = {Språkbanken, the Swedish Language Bank, is a language technology research unit at the Department of Swedish, University of Gothenburg. We develop language resources – such as corpora, lexical resources, and analytical tools – for all variants of Swedish, from Old Swedish laws to present-day social media. Historical texts offer exciting theoretical and methodological challenges for language technology because they often defy the assumption inherent in most automatic analysis tools that the texts contain a standardized written language. In this article, we describe our ongoing work on the development of annotated historical corpora, as well as our efforts on linking various resources (both corpora and lexical resources). This research advances the state of the art of language technology as well as enables new research for scholars in other disciplines.}, author = {Adesam, Yvonne and Ahlberg, Malin and Andersson, Peter and Borin, Lars and Bouma, Gerlof and Forsberg, Markus}, year = {2016}, volume = {76}, number = {Studier i svensk språkhistoria 13}, pages = {65--87}, } @inProceedings{Borin-Lars2016-238147, title = {Towards interactive visualization of public discourse in time and space}, abstract = {We report on a proof-of-concept study where we (1) apply NLP tools for extracting political-discourse topics from a large Swedish Twitter dataset; and (2) design an interactive spatiotemporal visualization application allowing humanities and social-science scholars to explore how the tweet topics vary over space and time.}, booktitle = {Linköping Electronic Conference Proceedings}, author = {Borin, Lars and Kosiński, Tomasz}, year = {2016}, volume = {126}, ISBN = {978-91-7685-733-5}, pages = {1--7}, } @misc{Eide-StianRødven2016-238134, title = "The Swedish Culturomics Gigaword Corpus: A One Billion Word Swedish Reference Dataset for NLP", author = "Eide, Stian Rødven and Tahmasebi, Nina and Borin, Lars", year = "2016", volume = "126", number = "002", isbn = "978-91-7685-733-5 ", pages = "8--12", } @incollection{Borin-Lars2016-246607, title = {Lexikografi för maskiner och lexikografi för människor}, booktitle = {Framtidens lexikografi: Rapport från ett symposium i Göteborg 5 oktober 2012}, author = {Borin, Lars}, year = {2016}, publisher = {Meijerbergs institut vid Göteborgs universitet}, adress = {Göteborg}, ISBN = {978-91-87850-01-1}, pages = {9--27}, } @inProceedings{Ahlberg-Malin2016-246072, title = {Karp: Språkbanken’s Open Lexical Infrastructure}, booktitle = {Globalex 2016}, author = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Olsson, Olof and Schumacher, Anne and Uppström, Jonatan}, year = {2016}, } @inProceedings{Ahlberg-Malin2016-246063, title = {Språkbanken’s Open Lexical Infrastructure}, abstract = {Karp is an open lexical infrastructure and a web based tool for searching, exploring and developing lexical resources. Språkbanken currently hosts a number of lexicons in Karp and on-going work aims at broadening the type of resources that can be developed in the system. This abstract gives a short overview of Karp's basic functionality, and describes some current projects and on-going work.}, booktitle = {SLTC 2016. The Sixth Swedish Language Technology Conference. Umeå University, 17-18 November, 2016}, author = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Olsson, Olof and Schumacher, Anne and Uppström, Jonatan}, year = {2016}, } @inProceedings{Borin-Lars2016-246053, title = {Sparv: Språkbanken’s corpus annotation pipeline infrastructure}, abstract = {Sparv is Språkbanken's corpus annotation pipeline infrastructure. The easiest way to use the pipeline is from its web interface with a plain text document. The pipeline uses in-house and external tools on the text to segment it into sentences and paragraphs, tokenise, tag parts-of-speech, look up in dictionaries and analyse compounds. The pipeline can also be run using a web API with XML results, and it is run locally at Språkbanken to prepare the documents in Korp, our corpus search tool. While the most sophisticated support is for modern Swedish, the pipeline supports 15 languages.}, booktitle = {SLTC 2016. The Sixth Swedish Language Technology Conference, Umeå University, 17-18 November, 2016}, author = {Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Rosén, Dan and Schäfer, Roland and Schumacher, Anne}, year = {2016}, } @misc{Volodina-Elena2016-248087, title = {Preface. Proceedings of the joint workshop on NLP for Computer Assisted Language Learning and NLP for Language Acquisition at SLTC, Umeå, 16th November 2016}, abstract = {The joint workshop on Natural Language Processing (NLP) for Computer-Assisted Language Learning (CALL) & NLP for Language Acquisition (LA) – shorthand NLP4CALL&LA – is an effort to provide a debate space and collaboration between two closely related areas. Both focus on language acquisition, related resources and technologies, that can support research of the language learning process as well as aim to bring interdisciplinary advantage to the field. Individual workshop areas are outlined below. The area of NLP4CALL is applied in essence, where tools, algorithms, and ready-to-use programs play an important role. It has a traditional focus on second or foreign language learning, and the target age group of school children or older. The intersection of Natural Language Processing and Speech Technology, with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has provided the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition (SLA) theories and practices, second language assessment, as well as knowledge of L2 pedagogy and didactics. The workshop on Language Processing for Research in Language Acquisition (NLP4LA) broadens the scope of the joint workshop to also include theoretical, empirical, and experimental investigation of first, second and bilingual language acquisition. NLP4LA aims to foster collaboration between the NLP, linguistics, psychology and cognitive science communities. The workshop is targeted at anyone interested in the relevance of computational techniques for first, second and bilingual language acquisition. The joint workshop series on NLP4CALL&LA has arisen in 2016 and has become a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in systems supporting language learning and research around it, and exploring the theoretical and methodological issues arising during language acquisition. }, author = {Volodina, Elena and Grigonytė, Gintarė and Pilán, Ildikó and Nilsson Björkenstam, Kristina and Borin, Lars}, year = {2016}, number = {130}, pages = { i–viii}, } @misc{Volodina-Elena2016-248081, title = {Proceedings of the joint workshop on NLP for Computer Assisted Language Learning and NLP for Language Acquisition at SLTC, Umeå, 16th November 2016}, abstract = {The joint workshop on Natural Language Processing (NLP) for Computer-Assisted Language Learning (CALL) & NLP for Language Acquisition (LA) – shorthand NLP4CALL&LA – is an effort to provide a debate space and collaboration between two closely related areas. Both focus on language acquisition, related resources and technologies, that can support research of the language learning process as well as aim to bring interdisciplinary advantage to the field. Individual workshop areas are outlined below. The area of NLP4CALL is applied in essence, where tools, algorithms, and ready-to-use programs play an important role. It has a traditional focus on second or foreign language learning, and the target age group of school children or older. The intersection of Natural Language Processing and Speech Technology, with Computer-Assisted Language Learning (CALL) brings “understanding” of language to CALL tools, thus making CALL intelligent. This fact has provided the name for this area of research – Intelligent CALL, ICALL. As the definition suggests, apart from having excellent knowledge of Natural Language Processing and/or Speech Technology, ICALL researchers need good insights into second language acquisition (SLA) theories and practices, second language assessment, as well as knowledge of L2 pedagogy and didactics. The workshop on Language Processing for Research in Language Acquisition (NLP4LA) broadens the scope of the joint workshop to also include theoretical, empirical, and experimental investigation of first, second and bilingual language acquisition. NLP4LA aims to foster collaboration between the NLP, linguistics, psychology and cognitive science communities. The workshop is targeted at anyone interested in the relevance of computational techniques for first, second and bilingual language acquisition. The joint workshop series on NLP4CALL&LA has arisen in 2016 and has become a meeting place for researchers working on the integration of Natural Language Processing and Speech Technologies in systems supporting language learning and research around it, and exploring the theoretical and methodological issues arising during language acquisition.}, author = {Volodina, Elena and Grigonytė, Gintarė and Pilán, Ildikó and Nilsson Björkenstam, Kristina and Borin, Lars}, year = {2016}, publisher = {Linköping University Electronic Press}, adress = {Linköping}, ISBN = {978-91-7685-633-8}, } @inProceedings{Eide-StianRødven2016-250073, title = {The Swedish Culturomics Gigaword Corpus: A One Billion Word Swedish Reference Dataset for NLP}, abstract = {In this paper we present a dataset of contemporary Swedish containing one billion words. The dataset consists of a wide range of sources, all annotated using a state-of-the-art corpus annotation pipeline, and is intended to be a static and clearly versioned dataset. This will facilitate reproducibility of experiments across institutions and make it easier to compare NLP algorithms on contemporary Swedish. The dataset contains sentences from 1950 to 2015 and has been carefully designed to feature a good mix of genres balanced over each included decade. The sources include literary, journalistic, academic and legal texts, as well as blogs and web forum entries.}, booktitle = {Linköping Electronic Conference Proceedings. Digital Humanities 2016. From Digitization to Knowledge 2016: Resources and Methods for Semantic Processing of Digital Works/Texts, July 11, 2016, Krakow, Poland}, author = {Eide, Stian Rødven and Tahmasebi, Nina and Borin, Lars}, year = {2016}, publisher = {Linköping University Electronic Press}, adress = {Linköping}, ISBN = {978-91-7685-733-5}, } @inProceedings{Borin-Lars2016-253952, title = {Towards a Big Data View on South Asian Linguistic Diversity}, abstract = {South Asia with its rich and diverse linguistic tapestry of hundreds of languages, including many from four major language families, and a long history of intensive language contact, provides rich empirical data for studies of linguistic genealogy, linguistic typology, and language contact. South Asia is often referred to as a linguistic area, a region where, due to close contact and widespread multilingualism, languages have influenced one another to the extent that both related and unrelated languages are more similar on many linguistic levels than we would expect. However, with some rare exceptions, most studies are largely impressionistic, drawing examples from a few languages. In this paper we present our ongoing work aiming at turning the linguistic material available in Grierson’s Linguistic Survey of India (LSI) into a digital language resource, a database suitable for a broad array of linguistic investigations of the languages of South Asia. In addition to this, we aim to contribute to the methodological development of large-scale comparative linguistics drawing on digital language resources, by exploring NLP techniques for extracting linguistic information from free-text language descriptions of the kind found in the LSI.}, booktitle = {WILDRE-3 – 3rd Workshop on Indian Language Data: Resources and Evaluation}, author = {Borin, Lars and Virk, Shafqat and Saxena, Anju}, year = {2016}, publisher = {ELRA}, adress = {Paris}, } @inProceedings{Cap-Fabienne2016-254388, title = {SWORD: Towards Cutting-Edge Swedish Word Processing}, abstract = {Despite many years of research on Swedish language technology, there is still no well-documented standard for Swedish word processing covering the whole spectrum from low-level tokenization to morphological analysis and disambiguation. SWORD is a new initiative within the SWE-CLARIN consortium aiming to develop documented standards for Swedish word processing. In this paper, we report on a pilot study of Swedish tokenization, where we compare the output of six different tokenizers on four different text types. For one text type (Wikipedia articles), we also compare to the tokenization produced by six manual annotators.}, booktitle = {Proceedings of the Sixth Swedish Language Technology Conference (SLTC) Umeå University, 17-18 November, 2016}, author = {Cap, Fabienne and Adesam, Yvonne and Ahrenberg, Lars and Borin, Lars and Bouma, Gerlof and Forsberg, Markus and Kann, Viggo and Östling, Robert and Smith, Aaron and Wirén, Mats and Nivre, Joakim}, year = {2016}, } @incollection{Rama-Taraka2015-197484, title = {Comparative evaluation of string similarity measures for automatic language classification.}, booktitle = {Sequences in Language and Text}, author = {Rama, Taraka and Borin, Lars}, year = {2015}, publisher = {De Gruyter Mouton}, ISBN = {978-3-11-036287-9}, } @article{Tahmasebi-Nina2015-212969, title = {Visions and open challenges for a knowledge-based culturomics}, abstract = {The concept of culturomics was born out of the availability of massive amounts of textual data and the interest to make sense of cultural and language phenomena over time. Thus far however, culturomics has only made use of, and shown the great potential of, statistical methods. In this paper, we present a vision for a knowledge-based culturomics that complements traditional culturomics. We discuss the possibilities and challenges of combining knowledge-based methods with statistical methods and address major challenges that arise due to the nature of the data; diversity of sources, changes in language over time as well as temporal dynamics of information in general. We address all layers needed for knowledge-based culturomics, from natural language processing and relations to summaries and opinions.}, author = {Tahmasebi, Nina and Borin, Lars and Capannini, Gabriele and Dubhashi, Devdatt and Exner, Peter and Forsberg, Markus and Gossen, Gerhard and Johansson, Fredrik and Johansson, Richard and Kågebäck, Mikael and Mogren, Olof and Nugues, Pierre and Risse, Thomas}, year = {2015}, volume = {15}, number = {2-4}, pages = {169--187}, } @inProceedings{Borin-Lars2015-217351, title = {Here be dragons? The perils and promises of inter-resource lexical-semantic mapping}, abstract = {Lexical-semantic knowledges sources are a stock item in the language technologist’s toolbox, having proved their practical worth in many and diverse natural language processing (NLP) applications. In linguistics, lexical semantics comes in many flavors, but in the NLP world, wordnets reign more or less supreme. There has been some promising work utilizing Roget-style thesauruses instead, but wider experimentation is hampered by the limited availability of such resources. The work presented here is a first step in the direction of creating a freely available Roget-style lexical resource for modern Swedish. Here, we explore methods for automatic disambiguation of interresource mappings with the longer-term goal of utilizing similar techniques for automatic enrichment of lexical-semantic resources.}, booktitle = {Linköping Electronic Conference Proceedings. Semantic resources and semantic annotation for Natural Language Processing and the Digital Humanities. Workshop at NODALIDA , May 11, 13-18 2015, Vilnius}, author = {Borin, Lars and Nieto Piña, Luis and Johansson, Richard}, year = {2015}, volume = {112}, ISBN = {978-91-7519-049-5}, pages = {1--11}, } @book{Volodina-Elena2015-226574, title = {Proceedings of the 4th workshop on NLP for computer assisted language learning at Nodalida 2015, Vilnius, 11th May, 2015}, author = {Volodina, Elena and Borin, Lars and Pilán, Ildikó}, year = {2015}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7519-036-5}, } @article{Rama-Taraka2014-187121, title = {N-Gram Approaches to the Historical Dynamics of Basic Vocabulary}, author = {Rama, Taraka and Borin, Lars}, year = {2014}, volume = {21}, number = {1}, pages = {50--64}, } @article{Borin-Lars2014-192931, title = {Kulturomik: Att spana efter språkliga och kulturella förändringar i digitala textarkiv}, author = {Borin, Lars and Johansson, Richard}, year = {2014}, } @inProceedings{Borin-Lars2014-193085, title = {Swesaurus; or, The Frankenstein Approach to Wordnet Construction}, abstract = {Swesaurus is a freely available (under a CC-BY license) Swedish wordnet under construction, built primarily by scavenging and recycling information from a number of existing lexical resources. Among its more unusual characteristics are graded lexical-semantic relations and inclusion of all parts of speech, not only open-class items. }, booktitle = {Proceedings of the Seventh Global WordNet Conference (GWC 2014)}, author = {Borin, Lars and Forsberg, Markus}, year = {2014}, ISBN = {978-9949-32-492-7}, } @article{Borin-Lars2014-198286, title = {Geographic visualization of place names in Swedish literary texts}, abstract = {This article describes the development of a geographical information system (GIS) at Språkbanken as part of a visualization solution to be used in an archive of historical Swedish literary texts. The research problems we are aiming to address concern orthographic and morphological variation, missing place names, and missing place name coordinates. Some of these problems form a central part in the development of methods and tools for the automatic analysis of historical Swedish literary texts at our research unit. We discuss the advantages and challenges of covering large-scale spelling variation in place names from different sources and in generating maps with focus on different time periods. }, author = {Borin, Lars and Dannélls, Dana and Olsson, Leif-Jöran}, year = {2014}, volume = {29}, number = {3}, pages = {400--404}, } @inProceedings{Borin-Lars2014-198549, title = {Bring vs. MTRoget: Evaluating automatic thesaurus translation}, booktitle = {Proceedings of LREC 2014, May 26-31, 2014 Reykjavik, Iceland}, author = {Borin, Lars and Allwood, Jens and de Melo, Gerard}, year = {2014}, publisher = {European Language Resources Association}, ISBN = {978-2-9517408-8-4}, } @inProceedings{Borin-Lars2014-198551, title = {Linguistic landscaping of South Asia using digital language resources: Genetic vs. areal linguistics}, booktitle = {Proceedings of LREC, May 26-31, 2014, Reykjavik, Iceland}, author = {Borin, Lars and Saxena, Anju and Rama, Taraka and Comrie, Bernard}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {3137--3144}, } @inProceedings{Rehm-Georg2014-198556, title = {The strategic impact of META-NET on the regional, national and international level}, booktitle = {Proceedings of LREC 2014, 26-31 May, Reykjavik, Iceland }, author = {Rehm, Georg and Uszkoreit, Hans and Ananiadou, Sophia and Bel, Núria and Bieleviciene, Audrone and Borin, Lars and Branco, António and Budin, Gerhard and Calzolari, Nicoletta and Daelemans, Walter and Garabík, Radovan and Grobelnik, Marko and Garcia-Mateo, Carmen and Genabith, Josef Van and Hajic, Jan and Hernaez, Inma and Judge, John and Koeva, Svetla and Krek, Simon and Krstev, Cvetana and Lindén, Krister and Magnini, Bernardo and Mariani, Joseph and Mcnaught, John and Melero, Maite and Monachini, Monica and Moreno, Asuncion and Odijk, Jan and Ogrodniczuk, Maciej and Pezik, Piotr and Piperidis, Stelios and Przepiórkowski, Adam and Rögnvaldsson, Eiríkur and Rosner, Michael and Pedersen, Bolette Sandford and Skadina, Inguna and De Smedt, Koenraad and Tadić, Marko and Thompson, Paul and Tufiș, Dan and Váradi, Tamás and Vasiljevs, Andrejs and Vider, Kadri and Zabarskaite, Jolanta}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {1517--1524}, } @inProceedings{Volodina-Elena2014-201885, title = {A flexible language learning platform based on language resources and web services. }, abstract = {We present Lärka, the language learning platform of Språkbanken (the Swedish Language Bank). It consists of an exercise generator which reuses resources available through Språkbanken: mainly Korp, the corpus infrastructure, and Karp, the lexical infrastructure. Through Lärka we reach new user groups – students and teachers of Linguistics as well as second language learners and their teachers – and this way bring Språkbanken's resources in a relevant format to them. Lärka can therefore be viewed as a case of a real-life language resource evaluation with end users. In this article we describe Lärka's architecture, its user interface, and the five exercise types that have been released for users so far. The first user evaluation following in-class usage with students of linguistics, speech therapy and teacher candidates are presented. The outline of future work concludes the paper.}, booktitle = {Proceedings of LREC 26-31 May 2014, Reykjavik, Iceland }, author = {Volodina, Elena and Pilán, Ildikó and Borin, Lars and Tiedemann, Therese Lindström}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {3973--3978}, } @incollection{Ribeck-JudyCarola2014-201965, title = {Lexical Bundles in Swedish Secondary School Textbooks}, abstract = {The present paper describes the process of identifying lexical bundles, i.e., frequently recurring word sequences such as by means of and in the end of, in secondary school history and physics textbooks. In its determination of finding genuine lexical bundles, i.e. the word boundaries between lexical bundles and surrounding arbitrary words, it proposes a new approach to come to terms with the problem of extracting overlapping bundles of different lengths. The results of the structural classification indicate that history uses more NP/PP-based and less dependent-clause-based bundles than physics. The comparative analysis manages to restrict this difference to the referential function. History almost only refers to phrases, i.e. within clauses, while physics much more tends to make references across clauses. The article also includes a report on an extension of the study, ongoing work where the automatic identification of multi-word expressions in general is in focus.}, booktitle = {Human Language Technology Challenges for Computer Science and Linguistics 5th Language and Technology Conference, LTC 2011, Poznań, Poland, November 25--27, 2011, Revised Selected Papers / edited by Zygmunt Vetulani, Joseph Mariani.}, author = {Ribeck, Judy Carola and Borin, Lars}, year = {2014}, publisher = {Springer International Publishing}, volume = {2014}, number = {XVI}, adress = {Cham}, ISBN = {978-3-319-08958-4}, pages = {238--249}, } @article{Borin-Lars2014-202127, title = {Introduction: Constructions and frames meet language technology}, author = {Borin, Lars and de Melo, Gerard and Friberg Heppin, Karin and Torrent, Tiago Timponi}, year = {2014}, volume = {6}, number = {1}, pages = {1--8}, } @inProceedings{Borin-Lars2014-204731, title = {Representing Swedish Lexical Resources in RDF with lemon}, abstract = {The paper presents an ongoing project which aims to publish Swedish lexical-semantic resources using Semantic Web and Linked Data technologies. In this article, we highlight the practical conversion methods and challenges of converting three of the Swedish language resources in RDF with lemon.}, booktitle = { Proceedings of the ISWC 2014 Posters & Demonstrations Track a track within the 13th International Semantic Web Conference (ISWC 2014)}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and McCrae, John P.}, year = {2014}, volume = {1272 }, pages = {329--332}, } @book{Volodina-Elena2014-206135, title = {Proceedings of the third workshop on NLP for computer-assisted language learning at SLTC 2014, Uppsala University}, abstract = {The workshop series on NLP for Computer-Assisted Language Learning (NLP4CALL) is a meeting place for researchers working on the integration of Natural Language Processing and Speech technologies in CALL systems and exploring the theoretical and methodological issues arising in this connection. The papers in the proceedings volume from the third NLP4CALL workshop cover three main topic areas: resources for development of ICALL applications (e.g., learner corpora and coursebook corpora), tools and algorithms for the analysis of learner language (e.g., focusing on collocations, reading tasks, cloze items, pronunciation, spelling, level classification of learner production), and the generation of learning materials (e.g., exercise generators).}, author = {Volodina, Elena and Borin, Lars and Pilán, Ildikó}, year = {2014}, publisher = {Linköping University Press}, adress = {Linköping}, ISBN = {978-91-7519-175-1}, } @article{Forsberg-Markus2014-3, title = "From construction candidates to constructicon entries: An experiment using semi-automatic methods for identifying constructions in corpora", journal = "Constructions and Frames", author = "Forsberg, Markus and Johansson, Richard and Bäckström, Linnéa and Borin, Lars and Lyngfelt, Benjamin and Olofsson, Joel and Prentice, Julia", year = "2014", volume = "6", number = "1", url = "http://www.jbe-platform.com/content/journals/10.1075/cf.6.1.07for", pages = "114--135", } @article{Forsberg-Markus2014-208123, title = {From construction candidates to constructicon entries: An experiment using semi-automatic methods for identifying constructions in corpora}, abstract = { We present an experiment where natural language processing tools are used to automatically identify potential constructions in a corpus. e experiment was conducted as part of the ongoing efforts to develop a Swedish constructicon. Using an automatic method to suggest constructions has advantages not only for efficiency but also methodologically: it forces the analyst to look more objec-tively at the constructions actually occurring in corpora, as opposed to focusing on “interesting” constructions only. As a heuristic for identifying potential con-structions, the method has proved successful, yielding about 200 (out of 1,200) highly relevant construction candidates.}, author = {Forsberg, Markus and Johansson, Richard and Bäckström, Linnéa and Borin, Lars and Lyngfelt, Benjamin and Olofsson, Joel and Prentice, Julia}, year = {2014}, volume = {6}, number = {1, 2014}, pages = {114--135}, } @inProceedings{Lyngfelt-Benjamin2014-208457, title = {Ett svenskt konstruktikon. Grammatik möter lexikon}, booktitle = {Svenskans beskrivning : Förhandlingar vid Trettiotredje sammankomsten för svenskans beskrivning. Helsingfors den 15–17 maj 2013}, author = {Lyngfelt, Benjamin and Borin, Lars and Bäckström, Linnéa and Forsberg, Markus and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia and Uppström, Jonatan}, year = {2014}, volume = {33}, ISBN = {978-951-51-0120-4}, pages = {268--279}, } @inProceedings{Kokkinakis-Dimitrios2014-209800, title = {HFST-SweNER . A New NER Resource for Swedish}, abstract = {Named entity recognition (NER) is a knowledge-intensive information extraction task that is used for recognizing textual mentions of entities that belong to a predefined set of categories, such as locations, organizations and time expressions. NER is a challenging, difficult, yet essential preprocessing technology for many natural language processing applications, and particularly crucial for language understanding. NER has been actively explored in academia and in industry especially during the last years due to the advent of social media data. This paper describes the conversion, modeling and adaptation of a Swedish NER system from a hybrid environment, with integrated functionality from various processing components, to the Helsinki Finite-State Transducer Technology (HFST) platform. This new HFST-based NER (HFST-SweNER) is a full-fledged open source implementation that supports a variety of generic named entity types and consists of multiple, reusable resource layers, e.g., various n-gram-based named entity lists (gazetteers).}, booktitle = {Proceedings of the 9th edition of the Language Resources and Evaluation Conference (LREC), Reykjavik 26 - 31 May 2014.}, author = {Kokkinakis, Dimitrios and Niemi, Jyrki and hardwick, sam and Lindén, Krister and Borin, Lars}, year = {2014}, ISBN = {978-2-9517408-8-4}, pages = {2537--2543}, } @inProceedings{Ahlberg-Malin2014-210083, title = {Swedish FrameNet++ The Beginning of the End and the End of the Beginning}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014}, author = {Ahlberg, Malin and Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Friberg Heppin, Karin and Johansson, Richard and Kokkinakis, Dimitrios and Olsson, Leif-Jöran and Uppström, Jonatan}, year = {2014}, } @inProceedings{Adesam-Yvonne2014-211376, title = {Koala – Korp’s Linguistic Annotations Developing an infrastructure for text-based research with high-quality annotations}, booktitle = {Proceedings of the Fifth Swedish Language Technology Conference, Uppsala, 13-14 November 2014}, author = {Adesam, Yvonne and Borin, Lars and Bouma, Gerlof and Forsberg, Markus and Johansson, Richard}, year = {2014}, } @inProceedings{Dannélls-Dana2013-178095, title = {MapServer for Swedish Language Technology}, abstract = {The MapServer application used by the Swedish Language Bank provides new opportunities for visualizing geographical information found in its large repository of written texts, in particular literary texts. The application is capable of performing coordinate search on the basis of recognized place names and rendering both static and dynamic maps that display their geographical locations. }, booktitle = {Digital Humanities}, author = {Dannélls, Dana and Borin, Lars and Olsson, Leif-Jöran}, year = {2013}, } @inProceedings{Ahlberg-Malin2013-178355, title = {Korp and Karp – a bestiary of language resources: the research infrastructure of Språkbanken}, abstract = {A central activity in Språkbanken, an R&D unit at the University of Gothenburg, is the systematic construction of a research infrastructure based on interoperability and widely accepted standards for metadata and data. The two main components of this infrastructure deal with text corpora and with lexical resources. For modularity and flexibility, both components have a backend, or server-side part, accessed through an API made up of a set of well-defined web services. This means that there can be any number of different user interfaces to these components, corresponding, e.g., to different research needs. Here, we will demonstrate the standard corpus and lexicon search interfaces, designed primarily for linguistic searches: Korp and Karp.}, booktitle = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16}, author = {Ahlberg, Malin and Borin, Lars and Forsberg, Markus and Hammarstedt, Martin and Olsson, Leif-Jöran and Olsson, Olof and Roxendal, Johan and Uppström, Jonatan}, year = {2013}, number = {16}, pages = {429--433}, } @inProceedings{Bäckström-Linnéa2013-178351, title = {Automatic identification of construction candidates for a Swedish constructicon}, abstract = {We present an experiment designed for extracting construction candidates for a Swedish constructicon from text corpora. We have explored the use of hybrid n-grams with the practical goal to discover previously undescribed partially schematic constructions. The experiment was successful, in that quite a few new constructions were discovered. The precision is low, but as a push-button tool for construction discovery, it has proven a valuable tool for the work on a Swedish constructicon.}, booktitle = {Proceedings of the workshop on lexical semantic resources for NLP at NODALIDA 2013, May 22-24, 2013, Oslo, Norway. NEALT Proceedings Series 19}, author = {Bäckström, Linnéa and Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Prentice, Julia and Sköldberg, Emma}, year = {2013}, pages = {2--11}, } @inProceedings{Pedersen-Bolette2013-178357, title = {Nordic and Baltic wordnets aligned and compared through “WordTies”}, abstract = {During the last few years, extensive wordnets have been built locally for the Nordic and Baltic languages applying very different compilation strategies. The aim of the present investigation is to consolidate and examine these wordnets through an alignment via Princeton Core WordNet and thereby compare them along the measures of taxonomical structure, synonym structure, and assigned relations to approximate to a best practice. A common web interface and visualizer “WordTies” is developed to facilitate this purpose. Four bilingual wordnets are automatically processed and evaluated exposing interesting differences between the wordnets. Even if the alignments are judged to be of a good quality, the precision of the translations vary due to considerable differences in hyponymy depth and interpretation of the synset. All seven monolingual and four bilingual wordnets as well as WordTies have been made available via META-SHARE through the META-NORD project.}, booktitle = {Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013), May 22–24, 2013, Oslo University, Norway. NEALT Proceedings Series 16}, author = {Pedersen, Bolette and Borin, Lars and Forsberg, Markus and Kahusk, Neeme and Lindén, Krister and Niemi, Jyrki and Nisbeth, Niklas and Nygaard, Lars and Orav, Heili and Rögnvaldsson, Eiríkur and Seaton, Mitchel and Vider, Kadri and Voionmaa, Kaarlo}, year = {2013}, number = {16}, pages = {147--162}, } @book{Borin-Lars2013-184757, title = {Approaches to Measuring Linguistic Differences}, abstract = {The present volume collects contributions addressing different aspects of the measurement of linguistic differences, a topic which probably is as old as language itself but at the same time has acquired renewed interest over the last decade or so, reflecting a rapid development of data-intensive computing in all fields of research, including linguistics.}, author = {Borin, Lars and Saxena, Anju}, year = {2013}, publisher = {De Gruyter Mouton}, adress = {Berlin}, ISBN = {978-3-11-030525-8}, } @incollection{Borin-Lars2013-184760, title = {The Intercontinental Dictionary Series – a rich and principled database for language comparison}, booktitle = {Approaches to Measuring Linguistic Differences}, editor = {Lars Borin ; Anju Saxena}, author = {Borin, Lars and Comrie, Bernard and Saxena, Anju}, year = {2013}, publisher = {De Gruyter Mouton}, adress = {Berlin}, ISBN = {978-3-11-030525-8}, pages = {285--302}, } @incollection{Borin-Lars2013-184758, title = {The why and how of measuring linguistic differences}, booktitle = {Approaches to Measuring Linguistic Differences / edited by Lars Borin, Anju Saxena}, author = {Borin, Lars}, year = {2013}, publisher = {De Gruyter Mouton}, adress = {Berlin}, ISBN = {978-3-11-030525-8}, pages = {3--26}, } @incollection{Saxena-Anju2013-184759, title = {Carving Tibeto-Kanauri by its joints: Using basic vocabulary lists for genetic grouping of languages}, booktitle = {Approaches to Measuring Linguistic Differences}, author = {Saxena, Anju and Borin, Lars}, year = {2013}, publisher = {De Gruyter Mouton}, adress = {Berlin}, ISBN = {978-3-11-030525-8}, pages = {175--198}, } @inProceedings{Borin-Lars2013-186032, title = {The lexical editing system of Karp}, abstract = {Karp is the open lexical infrastructure of Språkbanken (the Swedish Language Bank). The infrastructure has three main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; (2) to publish the resources, making them searchable and downloadable; and (3) to offer advanced editing functionalities. An important feature of the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 23 Swedish lexical resources. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish.}, booktitle = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia.}, author = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Olsson, Olof and Uppström, Jonatan}, year = {2013}, volume = {2013}, ISBN = { 978-961-93594-0-2}, pages = {503--516}, } @inProceedings{Sköldberg-Emma2013-186041, title = {Between Grammars and Dictionaries: a Swedish Constructicon }, abstract = {This paper introduces the Swedish Constructicon (SweCxn), a database of Swedish constructions currently under development. We also present a small study of the treatment of constructions in Swedish (paper) dictionaries, thus illustrating the need for a constructionist approach, and discuss three different methods used to identify potential constructions for inclusion in the constructicon. SweCxn is a freely available electronic resource, with a particular focus on semi-general linguistic patterns of the type that are difficult to account for from a purely lexicographic or a purely grammatical perspective, and which therefore have tended to be neglected in both dictionaries and grammars. Far from being a small set of borderline cases, such constructions are both numerous and common. They are also quite problematic for second language acquisition as well as LT applications. Accordingly, various kinds of multi-word units have received more attention in recent years, not least from a lexicographic perspective. The coverage, however, is only partial, and the productivity of many constructions is hard to capture from a lexical viewpoint. To identify constructions for SweCxn, we use a combination of methods, such as working from existing construction descriptions for Swedish and other languages, applying LT tools to discover recurring patterns in texts, and extrapolating constructional information from dictionaries. }, booktitle = {Kosem, I., Kallas, J., Gantar, P., Krek, S., Langemets, M., Tuulik, M. (eds.) 2013. Electronic lexicography in the 21st century: thinking outside the paper. Proceedings of the eLex 2013 conference, 17-19 October 2013, Tallinn, Estonia. Ljubljana/Tallinn: Trojina, Institute for Applied Slovene Studies/Eesti Keele Instituut.}, author = {Sköldberg, Emma and Bäckström, Linnéa and Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Olsson, Leif-Jöran and Prentice, Julia and Rydstedt, Rudolf and Tingsell, Sofia and Uppström, Jonatan}, year = {2013}, pages = {310--327}, } @article{Borin-Lars2013-187063, title = {Close encounters of the fifth kind: Some linguistic and computational aspects of the Swedish FrameNet++ project}, abstract = {The Swedish FrameNet++ (SweFN++) project aims at developing an integrated Swedish lexical macro-resource to be used primarily in language technology R&D to build natural language processing (NLP) applications. Most of the component resources making up SweFN++ are existing digital lexical resources; in their case the central project effort is directed at making them interoperable on as many levels as possible. An important new resource being created in the project is a Swedish framenet. Now a sister project is starting with the aim of adding a Swedish constructicon (SweCxn) to the macro-resource. In this paper, we discuss some theoretical and conceptual issues which have arisen in the course of our work on the SweFN++ and the planning of the SweCxn, in the close encounter between the practical requirements of NLP and the theory and practice of linguistic – lexical and grammatical – description. }, author = {Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin}, year = {2013}, volume = {17}, number = {1}, pages = {28--43}, } @article{Borin-Lars2013-188604, title = {SALDO: a touch of yin to WordNet's yang}, abstract = {The English-language Princeton WordNet (PWN) and some wordnets for other languages have been extensively used as lexical–semantic knowledge sources in language technology applications, due to their free availability and their size. The ubiquitousness of PWN-type wordnets tends to overshadow the fact that they represent one out of many possible choices for structuring a lexical-semantic resource, and it could be enlightening to look at a differently structured resource both from the point of view of theoretical–methodological considerations and from the point of view of practical text processing requirements. The resource described here—SALDO—is such a lexical–semantic resource, intended primarily for use in language technology applications, and offering an alternative organization to PWN- style wordnets. We present our work on SALDO, compare it with PWN, and discuss some implications of the differences. We also describe an integrated infrastructure for computational lexical resources where SALDO forms the central component.}, author = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart}, year = {2013}, volume = {47}, number = {4}, pages = {1191--1211}, } @book{Volodina-Elena2013-188675, title = {Proceedings of the second workshop on NLP for computer-assisted language learning at NODALIDA 2013 May 22-24, 2013, Oslo, Norway}, author = {Volodina, Elena and Borin, Lars and Loftsson, Hrafn}, year = {2013}, publisher = {Linköping University Press}, adress = {Linköping, Sweden}, ISBN = {978-91-7519-588-9}, } @inProceedings{Borin-Lars2013-188846, title = {Mining semantics for culturomics: towards a knowledge-based approach}, abstract = {The massive amounts of text data made available through the Google Books digitization project have inspired a new field of big-data textual research. Named culturomics, this field has attracted the attention of a growing number of scholars over recent years. However, initial studies based on these data have been criticized for not referring to relevant work in linguistics and language technology. This paper provides some ideas, thoughts and first steps towards a new culturomics initiative, based this time on Swedish data, which pursues a more knowledge-based approach than previous work in this emerging field. The amount of new Swedish text produced daily and older texts being digitized in cultural heritage projects grows at an accelerating rate. These volumes of text being available in digital form have grown far beyond the capacity of human readers, leaving automated semantic processing of the texts as the only realistic option for accessing and using the information contained in them. The aim of our recently initiated research program is to advance the state of the art in language technology resources and methods for semantic processing of Big Swedish text and focus on the theoretical and methodological advancement of the state of the art in extracting and correlating information from large volumes of Swedish text using a combination of knowledge-based and statistical methods.}, booktitle = {2013 ACM International Workshop on Mining Unstructured Big Data Using Natural Language Processing, UnstructureNLP 2013, Held at 22nd ACM International Conference on Information and Knowledge Management, CIKM 2013; San Francisco, CA; United States; 28 October 2013 through 28 October 2013}, author = {Borin, Lars and Dubhashi, Devdatt and Forsberg, Markus and Johansson, Richard and Kokkinakis, Dimitrios and Nugues, Pierre}, year = {2013}, ISBN = {978-1-4503-2415-1}, pages = {3--10}, } @book{Borin-Lars2013-190260, title = {Proceedings of the workshop on lexical semantic resources for NLP at NODALIDA 2013, May 22-24, 2013, Oslo, Norway}, author = {Borin, Lars and Fjeld, Ruth Vatvedt and Forsberg, Markus and Nimb, Sanni and Nugues, Pierre and Pedersen, Bolette Sandford}, year = {2013}, publisher = {Linköping University Electronic Press}, adress = {Linköping}, ISBN = {978-91-7519-586-5}, } @book{Eyþórsson-Þórhallur2013-190256, title = {Proceedings of the workshop on computational historical linguistics at NODALIDA 2013, May 22-24, 2013, Oslo, Norway}, author = {Eyþórsson, Þórhallur and Borin, Lars and Haug, Dag and Rögnvaldsson, Eiríkur}, year = {2013}, publisher = {Linköping University Electronic Press}, adress = {Linköping}, ISBN = {978-91-7519-587-2}, } @book{DeSmedt-Koenrad2013-190263, title = {Proceedings of the workshop on Nordic language research infrastructure at NODALIDA 2013, May 22-24, 2013, Oslo, Norway}, author = {De Smedt, Koenrad and Borin, Lars and Lindén, Krister and Maegaard, Bente and Rögnvaldsson, Eiríkur and Vider, Kadri}, year = {2013}, publisher = {Linköping University Electronic Press}, adress = {Linköping}, ISBN = {978-91-7519-585-8}, } @inProceedings{Skadina-Inguna2013-194532, title = {Baltic and Nordic parts of the European linguistic infrastructure}, booktitle = {71. Proceedings of the 19th Nordic Conference of Computational Linguistics (NODALIDA 2013) 22-24, May 2013 Oslo, Norway}, author = {Skadina, Inguna and Vasiljevs, Andrejs and Borin, Lars and Lindén, Krister and Losnegaard, Gyri and Pedersen, Bolette Sandford and Rozis, Roberts and De Smedt, Koenraad}, year = {2013}, ISBN = {978-91-7519-589-6}, pages = {195--211}, } @inProceedings{Pedersen-BoletteSandford2012-155599, title = {Linking and validating Nordic and Baltic wordnets}, booktitle = {Proceedings of the 6th International Global Wordnet Conference}, author = {Pedersen, Bolette Sandford and Borin, Lars and Forsberg, Markus and Lindén, Krister and Orav, Heili and Rögnvaldsson, Eírikur}, year = {2012}, volume = {Accepted}, pages = {254--260}, } @inProceedings{Borin-Lars2012-156079, title = {The open lexical infrastructure of Språkbanken}, abstract = {We present our ongoing work on Karp, Språkbanken’s (the Swedish Language Bank) open lexical infrastructure, which has two main functions: (1) to support the work on creating, curating, and integrating our various lexical resources; and (2) to publish daily versions of the resources, making them searchable and downloadable. An important requirement on the lexical infrastructure is also that we maintain a strong bidirectional connection to our corpus infrastructure. At the heart of the infrastructure is the SweFN++ project with the goal to create free Swedish lexical resources geared towards language technology applications. The infrastructure currently hosts 15 Swedish lexical resources, including historical ones, some of which have been created from scratch using existing free resources, both external and in-house. The resources are integrated through links to a pivot lexical resource, SALDO, a large morphological and lexical-semantic resource for modern Swedish. SALDO has been selected as the pivot partly because of its size and quality, but also because its form and sense units have been assigned persistent identifiers (PIDs) to which the lexical information in other lexical resources and in corpora are linked.}, booktitle = {Proceedings of the 8th International Conference on Language Resources and Evaluation : May 23-25, 2012 / eds. Nicoletta Calzolari }, author = {Borin, Lars and Forsberg, Markus and Olsson, Leif-Jöran and Uppström, Jonatan}, year = {2012}, ISBN = {978-2-9517408-7-7}, pages = {3598--3602}, } @inProceedings{Borin-Lars2012-156080, title = {Korp – the corpus infrastructure of Språkbanken}, abstract = {We present Korp, the corpus infrastructure of Språkbanken (the Swedish Language Bank). The infrastructure consists of three main components: the Korp corpus pipeline, the Korp backend, and the Korp frontend. The Korp corpus pipeline is used for importing corpora, annotating them, and then exporting the annotated corpora into different formats. An essential feature of the pipeline is the ability to leave existing annotations untouched, both structural and word level annotations, and to use the existing annotations as the foundation of other annotations. The Korp backend consists of a set of REST-based web services for searching in and retrieving information about the corpora. Finally, the Korp frontend is a graphical search interface that interacts with the Korp backend. The interface has been inspired by corpus search interfaces such as SketchEngine, Glossa, and DeepDict, and it uses State Chart XML (SCXML) in order to enable users to bookmark interaction states. We give a functional and technical overview of the three components, followed by a discussion of planned future work. }, booktitle = {Proceedings of LREC 2012. Istanbul: ELRA}, author = {Borin, Lars and Forsberg, Markus and Roxendal, Johan}, year = {2012}, volume = {Accepted}, pages = {474–478}, } @inProceedings{Dannélls-Dana2012-156502, title = {Toward language independent methodology for generating artwork descriptions – Exploring FrameNet information}, abstract = {Today museums and other cultural heritage institutions are increasingly storing object descriptions using semantic web domain ontologies. To make this content accessible in a multilingual world, it will need to be conveyed in many languages, a language generation task which is domain specific and language dependent. This paper describes how semantic and syntactic information such as that provided in a framenet can contribute to solving this task. It is argued that the kind of information offered by such lexical resources enhances the output quality of a multilingual language generation application, in particular when generating domain specific content. }, booktitle = {EACL 2012 workshop on Language Technology for Cultural Heritage, Social Sciences, and Humanities (LaTeCH)}, author = {Dannélls, Dana and Borin, Lars}, year = {2012}, pages = {18–23}, } @inProceedings{Borin-Lars2012-157213, title = {Transferring Frames: Utilization of Linked Lexical Resources}, abstract = {In our experiment, we evaluate the transferability of frames from Swedish to Finnish in parallel corpora. We evaluate both the theoretical possibility of transferring frames and the possibility of performing it using available lexical resources. We add the frame information to an extract of the Swedish side of the Kotus and JRC-Acquis corpora using an automatic frame labeler and copy it to the Finnish side. We focus on evaluating the results to get an estimation on how often the parallel sentences can be said to express the same frame. This sheds light to the questions: Are the same situations in the two languages expressed using different frames, i.e. are the frames transferable even in theory? How well can the frame information of running text be transferred from language to another? }, booktitle = {Proceedings of the Workshop on Inducing Linguistic Structure Submission (WILS)}, author = {Borin, Lars and Forsberg, Markus and Johansson, Richard and Muhonen, Kristiina and Purtonen, Tanja and Voionmaa, Kaarlo}, year = {2012}, pages = {8--15}, } @inProceedings{Borin-Lars2012-157338, title = {Search Result Diversification Methods to Assist Lexicographers}, abstract = {We show how the lexicographic task of finding informative and diverse example sentences can be cast as a search result diversification problem, where an objective based on relevance and diversity is maximized. This problem has been studied intensively in the information retrieval community during recent years, and efficient algorithms have been devised. We finally show how the approach has been implemented in a lexicographic project, and describe the relevance and diversity functions used in that context. }, booktitle = {Proceedings of the 6th Linguistic Annotation Workshop}, author = {Borin, Lars and Forsberg, Markus and Friberg Heppin, Karin and Johansson, Richard and Kjellandsson, Annika}, year = {2012}, pages = {113--117}, } @incollection{Borin-Lars2012-162377, title = {Core vocabulary: A useful but mystical concept in some kinds of linguistics}, booktitle = {Shall we play the festschrift game ? Essays on the Occasion of Lauri Carlson's 60th Birthday}, author = {Borin, Lars}, year = {2012}, publisher = {Springer}, adress = {Berlin}, ISBN = {978-3-642-30772-0}, pages = {53--65}, } @book{Borin-Lars2012-163410, title = {Svenska språket i den digitala tidsåldern}, author = {Borin, Lars and Brandt, Martha and Edlund, Jens and Lindh, Jonas and Parkvall, Mikael}, year = {2012}, publisher = {Springer}, adress = {Berlin}, ISBN = {978-3-642-30831-4}, } @inProceedings{Lyngfelt-Benjamin2012-163582, title = {Adding a constructicon to the Swedish resource network of Språkbanken}, abstract = {This paper presents the integrated Swedish resource network of Språkbanken in general, and its latest addition – a constructicon – in particular. The constructicon, which is still in its early stages, is a collection of (partially) schematic multi-word units, constructions, developed as an addition to the Swedish FrameNet (SweFN). SweFN and the constructicon are integrated with other parts of Språkbanken, both lexical resources and corpora, through the lexical resource SALDO. In most respects, the constructicon is modeled on its English counterpart in Berkeley, and, thus, following the FrameNet format. The most striking differencies are the inclusion of so-called collostructional elements and the treatment of semantic roles, which are defined globally instead of locally as in FrameNet. Incorporating subprojects such as developing methods for automatic identification of constructions in authentic text on the one hand, and accounting for constructions problematic for L2 acquisition on the other, the approach is highly cross-disciplinary in nature, combining various theoretical linguistic perspectives on construction grammar with language technology, lexicography, and L2 research.}, booktitle = {11th Conference on Natural Language Processing (KONVENS) Proceedings}, author = {Lyngfelt, Benjamin and Borin, Lars and Forsberg, Markus and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia}, year = {2012}, ISBN = {3-85027-005-X}, pages = {452--461}, } @inProceedings{Rama-Taraka2012-164449, title = {Properties of phoneme N -grams across the world’s language families}, abstract = {In this article, we investigate the properties of phoneme N -grams across half of the world’s languages. The sizes of three different N -gram distributions of the world’s language families obey a power law. Further, the N -gram distributions of language families parallel the sizes of the families, which also follow a power law distribution. The correlation between N -gram distributions and language family sizes improves with increasing values of N . The study also raises some new questions about the use of N -gram distributions in linguistic research, which we hope to be able to investigate in the future.}, booktitle = {Proceedings of the Fourth Swedish Language Technology Conference (SLTC)}, author = {Rama, Taraka and Borin, Lars}, year = {2012}, } @inProceedings{Volodina-Elena2012-165936, title = {Waste not, want not: Towards a system architecture for ICALL based on NLP component re-use}, booktitle = {Proceedings of the SLTC 2012 workshop on NLP for CALL, Lund, 25th October, 2012}, author = {Volodina, Elena and Borin, Lars and Loftsson, Hrafn and Arnbjörnsdóttir, Birna and Leifsson, Guðmundur Örn}, year = {2012}, pages = {47--58}, } @book{Larsson-Staffan2012-167661, title = {From Quantification to Conversation}, author = {Larsson, Staffan and Borin, Lars}, year = {2012}, publisher = {College Publications}, adress = {London}, ISBN = {978-1-84890-091-2}, } @inProceedings{Volodina-Elena2012-168523, title = {Developing an Open-Source Web-Based Exercise Generator for Swedish}, abstract = {This paper reports on the ongoing international project System architecture for ICALL and the progress made by the Swedish partner. The Swedish team is developing a web-based exercise generator reusing available annotated corpora and lexical resources. Apart from the technical issues like implementation of the user interface and the underlying processing machinery, a number of interesting pedagogical questions need to be solved, e.g., adapting learner-oriented exercises to proficiency levels; selecting authentic examples of an appropriate difficulty level; automatically ranking corpus examples by their quality; providing feedback to the learner, and selecting vocabulary for training domain-specific, academic or general-purpose vocabulary. In this paper we describe what has been done so far, mention the exercise types that can be generated at the moment as well as describe the tasks left for the future. }, booktitle = {CALL: Using, Learning, Knowing. EuroCALL Conference, Gothenburg, Sweden, 22-25 August 2012, Proceedings. Eds. Linda Bradley and Sylvie Thouësny. Research-publishing.net, Dublin, Ireland}, author = {Volodina, Elena and Borin, Lars}, year = {2012}, volume = {2012}, ISBN = {978-1-908416-03-2}, } @inProceedings{Volodina-Elena2012-168516, title = {Towards a system architecture for ICALL}, abstract = {In this paper, we present an on-going project whose overall aim is to develop open-source system architecture for supporting ICALL systems that will facilitate re-use of existing NLP tools and resources on a plug-and-play basis. We introduce the project, describe the approaches adopted by the two language teams, and present two applications being developed using the proposed architecture.}, booktitle = {In G. Biswas et al. (eds), Proceedings of the 20th International Conference on Computers in Education. Singapore: Asia-Pacific Society for Computers in Education}, author = {Volodina, Elena and Hrafn, Loftsson and Arnbjörnsdóttir, Birna and Borin, Lars and Leifsson, Guðmundur Örn}, year = {2012}, volume = {2012}, ISBN = {978-981-07-4649-0}, } @inProceedings{Borin-Lars2012-171988, title = {Growing a Swedish constructicon in lexical soil}, booktitle = {Proceedings of the Swedish Language Technology Conference. Lund, October 24-26, 2012}, author = {Borin, Lars and Forsberg, Markus and Lyngfelt, Benjamin and Prentice, Julia and Rydstedt, Rudolf and Sköldberg, Emma and Tingsell, Sofia}, year = {2012}, pages = {10--11}, } @book{Borin-Lars2012-188679, title = {Proceedings of the SLTC 2012 workshop on NLP for CALL}, author = {Borin, Lars and Volodina, Elena}, year = {2012}, publisher = {LiU Electronic Press}, adress = {Linköping}, } @inProceedings{Borin-Lars2011-140686, title = {Semantic Search in Literature as an e-Humanities Research Tool: CONPLISIT – Consumption Patterns and Life-Style in 19th Century Swedish Literature}, abstract = {We present our ongoing work on language technology-based e-science in the humanities, with a focus on text-based research in the historical sciences. Currently, we are working on the adaptation and integration of lexical resources representing different historical stages of Swedish into a lexical and morphological toolbox that will allow us to develop semantically oriented text search applications for historical research on Swedish text. We describe a semantic search prototype which was built using REST web services from this toolbox as components, and which has been evaluated by historians interested in using digitized 19th century novels as primary data for an historical investigation of the emerging consumer society in 19th century Sweden.}, booktitle = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)}, author = {Borin, Lars and Forsberg, Markus and Ahlberger, Christer}, year = {2011}, volume = {11}, pages = {58--65}, } @inProceedings{Rama-Taraka2011-140688, title = {Estimating Language Relationships from a Parallel Corpus. A Study of the Europarl Corpus}, abstract = {Since the 1950s, linguists have been using short lists (40–200 items) of basic vocabulary as the central component in a methodology which is claimed to make it possible to automatically calculate genetic relationships among languages. In the last few years these methods have experienced something of a revival, in that more languages are involved, different distance measures are systematically compared and evaluated, and methods from computational biology are used for calculating language family trees. In this paper, we explore how this methodology can be extended in another direction, by using larger word lists automatically extracted from a parallel corpus using word alignment software. We present preliminary results from using the Europarl parallel corpus in this way for estimating the distances between some languages in the Indo-European language family.}, booktitle = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)}, author = {Rama, Taraka and Borin, Lars}, year = {2011}, volume = {11}, pages = {161--167}, } @inProceedings{Saxena-Anju2011-140689, title = {Dialect Classification in the Himalayas: a Computational Approach}, abstract = {Linguistic fieldwork data – in the form of basic vocabulary lists – for nine closely related language varieties are compared using an automatic procedure with manual feedback, whose major advantage is its complete consistency. The results of the vocabulary comparison turn out to be in accord with other linguistic features, making this methodology a promising addition to the toolbox of genetic lingusitics.}, booktitle = {NEALT Proceedings Series (NODALIDA 2011 Conference Proceedings)}, author = {Saxena, Anju and Borin, Lars}, year = {2011}, volume = {11}, pages = {307--310}, } @inProceedings{Vasljevs-Andrejs2011-140690, title = {META-NORD: Baltic and Nordic Branch of the European Open Linguistic Infrastructure}, booktitle = {Proceedings of the Nodalida 2011 Workshop on visibilty and availability of LT resources}, author = {Vasljevs, Andrejs and Pedersen, Bolette Sandford and De Smedt, Koenraad and Borin, Lars and Skadina, Inguna}, year = {2011}, } @article{Hammarström-Harald2011-141707, title = {Unsupervised learning of morphology}, author = {Hammarström, Harald and Borin, Lars}, year = {2011}, volume = {37}, number = {2}, pages = {309--350}, } @techreport{Borin,Lars2011-7, title = "Languages in the European Information Society - Swedish", author = "Borin, Lars and Brandt, Martha and Edlund, Jens and Lindh, Jonas and Parkvall, Mikael", year = "2011", publisher = "META-NET DFKI Projektbüro Berlin", address = "Berlin", } @techreport{Borin-Lars2011-142495, title = {Metadata descriptions and other interoperability standards}, abstract = {An important aim of META-NORD is to upgrade and harmonize national language resources and tools in order to make them interoperable, within languages and across languages, with respect to their data formats and as far as possible also as regards their content. Since resources and to some extent tools will remain in one location – one of a number of META-NORD centers – the preferred way of accessing and utilizing resources and tools will be through metadata and APIs, allowing the assembly of on-the-fly tool-chains made up of standardized component language technology tools, processing distributed – and in many cases interlinked – language resources in standardized formats.}, author = {Borin, Lars and Lindh, Jonas and Brandt, Martha and Olsson, Leif-Jöran}, year = {2011}, } @incollection{Borin-Lars2011-144291, title = {A diachronic computational lexical resource for 800 years of Swedish}, booktitle = {Language technology for cultural heritage}, author = {Borin, Lars and Forsberg, Markus}, year = {2011}, publisher = {Springer}, adress = {Berlin}, ISBN = {978-3-642-20226-1}, pages = {41--61}, } @inProceedings{Skadina-Inguna2011-148648, title = {META-NORD: Towards sharing of language resources in Nordic and Baltic countries}, abstract = {This paper introduces the META-NORD project which develops Nordic and Baltic part of the European open language resource infrastructure. META-NORD works on assembling, linking across languages, and making widely available the basic language resources used by developers, professionals and researchers to build specific products and applications. The goals of the project, overall approach and specific action lines on wordnets, terminology resources and treebanks are described. Moreover, results achieved in first five months of the project, i.e. language whitepapers, metadata specification and IPR management, are presented.}, booktitle = {Proceedings of the Workshop on Language Resources, Technology and Services in the Sharing Paradigm}, author = {Skadina, Inguna and Vasiljevs, Andrejs and Borin, Lars and De Smedt, Koenraad and Lindén, Krister and Rögnvaldsson, Eiríkur}, year = {2011}, pages = {107--114}, } @article{Borin-Lars2011-151331, title = {Swesaurus – ett svenskt ordnät med fria tyglar}, author = {Borin, Lars and Forsberg, Markus}, year = {2011}, volume = {18}, pages = {17--39}, } @inProceedings{Borin-Lars2010-118907, title = {Diabase: Towards a diachronic BLARK in support of historical studies}, booktitle = {Proceedings of LREC 2010}, author = {Borin, Lars and Forsberg, Markus and Kokkinakis, Dimitrios}, year = {2010}, } @inProceedings{Borin-Lars2010-118908, title = {From the People’s Synonym Dictionary to fuzzy synsets - first steps}, booktitle = {Proceedings of the LREC 2010 workshop Semantic relations. Theory and Applications}, author = {Borin, Lars and Forsberg, Markus}, year = {2010}, pages = {18--25}, } @inProceedings{Wittenburg-Peter2010-118909, title = {Resource and service centres as the backbone for a sustainable service infrastructure}, booktitle = {Proceedings of LREC 2010}, author = {Wittenburg, Peter and Bel, Nuria and Borin, Lars and Budin, Gerhard and Calzolari, Nicoletta and Hajicova, Eva and Koskenniemi, Kimmo and Lemnitzer, Lothar and Mægaard, Bente and Piasecki, Maciej and Pierrel, Jean-Marie and Piperidis, Stelios and Skadina, Inguna and Tufis, Dan and van Veenendal, Remco and Váradi, Tamás and Wynne, Martin}, year = {2010}, } @inProceedings{Borin-Lars2010-110368, title = {The past meets the present in Swedish FrameNet++}, abstract = {The paper is about a recently initiated project which aims at the development of a Swedish FrameNet as an integral part of a larger lexical resource, hence the name “Swedish FrameNet++” (SweFN++). It focuses on reuse of free electronic resources and their role in the acquisition and population of Swedish frames. After a brief overview of Swedish resources, we reflect on three approaches to recycling the available lexical data in a semi-automatic manner. SweFN++ will be a multi-functional resource supporting research within lexicology and linguistics as well as different applications within computational lexicography and language technology, not to mention e-science.}, booktitle = {14th EURALEX International Congress}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2010}, pages = {269--281}, } @incollection{Borin-Lars2010-124517, title = {Literary onomastics and language technology}, booktitle = {Literary education and digital learning}, author = {Borin, Lars and Kokkinakis, Dimitrios}, year = {2010}, publisher = {Information Science Reference}, adress = {Hershey - New York}, ISBN = {978-1-60566-932-8}, pages = {53--78}, } @article{Borin-Lars2010-129126, title = {Swedish FrameNet++}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2010}, } @article{Borin-Lars2010-129125, title = {Beyond the synset: Swesaurus – a fuzzy Swedish wordnet}, author = {Borin, Lars and Forsberg, Markus}, year = {2010}, } @article{Borin-Lars2010-130257, title = {Med Zipf mot framtiden - en integrerad lexikonresurs för svensk språkteknologi}, author = {Borin, Lars}, year = {2010}, volume = {17}, pages = {35--54}, } @incollection{Borin-Lars2010-136656, title = {Avtryck från WGLN-­projekten i forskningen}, booktitle = {Kunskapens nya världar}, author = {Borin, Lars}, year = {2010}, publisher = {Uppsala universitet, Uppsala Learning Lab}, adress = {Uppsala}, ISBN = {978-91-506-2189-1}, pages = {127--133}, } @inProceedings{Andréasson-Maia2009-102211, title = {Swedish CLARIN activities}, booktitle = {Proceedings of the Nodalida 2009 workshop on CLARIN activities in the Nordic countries. NEALT Proceedings Series}, author = {Andréasson, Maia and Borin, Lars and Forsberg, Markus and Beskow, Jonas and Carlson, Rolf and Edlund, Jens and Elenius, Kjell and Hellmer, Kahl and House, David and Merkel, Magnus and Forsbom, Eva and Megyesi, Beáta and Eriksson, Anders and Strömqvist, Sven}, year = {2009}, volume = {5}, pages = {1--5}, } @inProceedings{Borin-Lars2009-102209, title = {Linguistic diversity in the information society}, booktitle = {Proceedings of the SALTMIL 2009 workshop on Information Retrieval and Information Extraction for Less Resourced Languages}, author = {Borin, Lars}, year = {2009}, ISBN = {978-84-692-4940-6}, pages = {1--7}, } @inProceedings{Borin-Lars2009-102212, title = {All in the family: A comparison of SALDO and WordNet}, booktitle = {Proceedings of the Nodalida 2009 Workshop on WordNets and other Lexical Semantic Resources - between Lexical Semantics, Lexicography, Terminology and Formal Ontologies. NEALT Proceedings Series}, author = {Borin, Lars and Forsberg, Markus}, year = {2009}, volume = {7}, } @techreport{Borin-Lars2009-102214, title = {One in the bush: Low-density language technology}, author = {Borin, Lars}, year = {2009}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Borin-Lars2009-110343, title = {Thinking Green: Toward Swedish FrameNet++}, abstract = {Access to multi-layered lexical, grammatical and semantic information representing text content is a prerequisite for efficient automatic understanding and generation of natural language. A FrameNet is considered a valuable resource for both linguistics and language technology research that may contribute to the achievement of these goals. Currently, FrameNet-like resources exist for a few languages,1 including some domain-specific and multilingual initiatives (Dolbey et al., 2006; Boas, 2009; Uematsu et al., 2009; Venturi et al., 2009), but are unavailable for most languages, including Swedish, although there have been some pilot studies exploring the semi-automatic acquisition of Swedish frames (Johansson & Nugues, 2006; Borin et al., 2007). At the University of Gothenburg, we are now embarking on a project to build a Swedish FrameNet-like resource. A novel feature of this project is that the Swedish FrameNetwill be an integral part of a largermany-faceted lexical resource. Hence the name Swedish FrameNet++ (SweFN++). }, booktitle = {FrameNet Masterclass and Workshop}, author = {Borin, Lars and Dannélls, Dana and Forsberg, Markus and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2009}, } @book{Lendvai-Piroska2009-91853, title = {Proceedings of the EACL 2009 Workshop on Language Technology and Resources for Cultural Heritage, Social Sciences, Humanities, and Education (LaTeCH -- SHELT&R 2009)}, author = {Lendvai, Piroska and Borin, Lars}, year = {2009}, publisher = {ACL}, adress = {Athens}, ISBN = {1-932432-21-3}, } @techreport{Andréasson-Maia2008-102220, title = {Habeas Corpus: A survey for SNK - a Swedish national corpus}, author = {Andréasson, Maia and Borin, Lars and Merkel, Magnus}, year = {2008}, publisher = {University of Gothenburg}, adress = {Göteborg}, } @inProceedings{Borin-Lars2008-72502, title = {Something old, something new: A computational morphological description of Old Swedish}, booktitle = {LREC 2008 Workshop on Language Technology for Cultural Heritage Data (LaTeCH 2008)}, author = {Borin, Lars and Forsberg, Markus}, year = {2008}, pages = {9--16}, } @article{Borin-Lars2008-72506, title = {Review of Stig Johansson: Seeing through multilingual corpora: On the use of corpora in contrastive studies}, author = {Borin, Lars}, year = {2008}, volume = {32}, pages = {261--267}, } @article{Borin-Lars2008-110525, title = {SALDO 1.0 (Svenskt associationslexikon version 2)}, author = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart}, year = {2008}, } @incollection{Borin-Lars2008-72507, title = {Lemma, lexem eller mittemellan? Ontologisk ångest i den digitala domänen}, booktitle = {Nog ordat? Festskrift till Sven-Göran Malmgren}, author = {Borin, Lars}, year = {2008}, publisher = {University of Gothenburg}, adress = {Göteborg}, pages = {59--67}, } @incollection{Borin-Lars2008-72504, title = {The hunting of the BLARK - SALDO, a freely available lexical database for Swedish language technology}, booktitle = {Resourceful language technology. Festschrift in honor of Anna Sågvall Hein}, author = {Borin, Lars and Forsberg, Markus and Lönngren, Lennart}, year = {2008}, publisher = {Uppsala University}, adress = {Uppsala}, pages = {21--32}, } @inProceedings{Borin-Lars2007-44951, title = {Medical frames as target and tool}, booktitle = {FRAME 2007: Building Frame Semantics resources for Scandinavian and Baltic languages. (Nodalida 2007 workshop proceedings)}, author = {Borin, Lars and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios}, year = {2007}, ISBN = {978-91-976939-0-5}, pages = {11--18}, } @inProceedings{Borin-Lars2007-44954, title = {Naming the past: Named entity and animacy recognition in 19th century Swedish literature}, booktitle = {ACL 2007 Workshop on Language Technology for Cultural Heritage Data (LaTeCH 2007)}, author = {Borin, Lars and Kokkinakis, Dimitrios and Olsson, Leif-Jöran}, year = {2007}, pages = {1--8}, } @techreport{Borin-Lars2007-53590, title = {Empowering the patient with language techno­logy}, author = {Borin, Lars and Grabar, Natalia and Hallett, Catalina and Hardcastle, david and Toporowska Gronostaj, Maria and Kokkinakis, Dimitrios and Williams, Sandra and Willis, Alistair}, year = {2007}, publisher = {Göteborg University}, adress = {Göteborg}, } @incollection{Borin-Lars2006-33863, title = {Supporting lesser-known languages: The promise of language technology}, booktitle = {Saxena, A. & Borin, L. (eds). Lesser-known languages of South Asia. Status and policies, case studies and applications of information technology}, author = {Borin, Lars}, year = {2006}, publisher = {Mouton de Gruyter}, adress = {Berlin}, ISBN = {3-11-018976-3}, pages = {317--337}, } @incollection{Borin-Lars2006-33864, title = {Vi som går köksvägen: Språkteknologer och korpuslingvister i Litteraturbanken}, booktitle = {Börjesson, M. (red). Fältanteckningar: Utbildnings- och kultursociologiska texter tillägnade Donald Broady}, author = {Borin, Lars}, year = {2006}, publisher = {Forskningsgruppen för utbildnings- och kultursociologi (ILU), Uppsala universitet}, adress = {Uppsala}, ISBN = {91-631-8807-4}, pages = {399--404}, } @incollection{Borin-Lars2006-44950, title = {Sparv i tranedansen eller fisken i vattnet? Språkteknologi och språklärande}, booktitle = {Från vision till praktik: Språkutbildning och informationsteknik}, author = {Borin, Lars}, year = {2006}, publisher = {NSHU - Myndigheten för nätverk och samarbete inom högre utbildning}, adress = {Härnösand}, ISBN = {978-91-975425-8-6}, pages = {25--49}, } @inProceedings{Borin-Lars2006-116093, title = {ITG-plattformen som korpusverktyg}, abstract = {En genomgång och handfast presentation om hur ITG-plattformen kan användas som korpusverktyg.}, booktitle = {Fjärde svenska lingvistikkonferensen (Sling 2006), 27–28 april 2006, Stockholm}, author = {Borin, Lars and Olsson, Leif-Jöran}, year = {2006}, } @inProceedings{Markó-Kornél2006-40540, title = {Towards a multilingual medical lexicon}, booktitle = {Proceedings of the American Medical Informatics Association Symposium (AMIA '06)}, author = {Markó, Kornél and Baud, Robert and Zweigenbaum, Pierre and Borin, Lars and Merkel, Magnus and Schulz, Stefan}, year = {2006}, pages = {534--538}, } @book{Saxena-Anju2006-33862, title = {Lesser-known languages of South Asia. Status and policies, case studies and applications of information technology}, author = {Saxena, Anju and Borin, Lars}, year = {2006}, publisher = {Mouton de Gruyter}, adress = {Berlin}, ISBN = {3-11-018976-3}, } @techreport{Åhlfelt-Hans2006-34047, title = {Literature Review on Patient_Friendly Documentation Systems}, author = {Åhlfelt, Hans and Borin, Lars and Daumke, Philipp and Grabar, Natalia and Hallett, Catalina and Hardcastle, david and Kokkinakis, Dimitrios and Mancini, Clara and Marko, Kornel and Merkel, Magnus and Pietsch, Christian and Power, Richard and Scott, Donia and Silvervarg, Annika and Toporowska Gronostaj, Maria and Williams, Sandra and Willis, Alistair}, year = {2006}, publisher = {Göteborg University}, adress = {Göteborg}, } @inProceedings{Baud-Robert2005-33867, title = {Interchanging lexical information for a multilingual dictionary}, booktitle = {AMIA 2005 Proceedings}, author = {Baud, Robert and Nyström, Mikael and Borin, Lars and Evans, Roger and Schulz, Stefan and Zweigenbaum, Pierre}, year = {2005}, pages = {31--35}, } @article{Borin-Lars2005-33865, title = {Mannen är faderns mormor: Svenskt associationslexikon reinkarnerat}, author = {Borin, Lars}, year = {2005}, volume = {12}, pages = {39--54}, } @incollection{Borin-Lars2004-33944, title = {Grammar, incorporated}, booktitle = {Henrichsen, P. J. (ed). CALL for the Nordic languages}, author = {Borin, Lars and Saxena, Anju}, year = {2004}, publisher = {Samfundslitteratur}, adress = {Frederiksberg}, ISBN = {87-593-1176-2}, pages = {125--145}, } @incollection{Borin-Lars2004-33945, title = {New wine in old skins? A corpus investigation of L1 syntactic transfer in learner language}, booktitle = {Aston, G., Bernardini, S. & Stewart, D. (eds). Corpora and language learners}, author = {Borin, Lars and Prütz, Klas}, year = {2004}, publisher = {John Benjamins}, adress = {Amsterdam}, ISBN = {90-272-2288-6}, pages = {67--87}, } @incollection{Borin-Lars2004-33976, title = {Language technology resources for less prevalent languages: Will the Münchhausen Model work?}, booktitle = {Holmboe, H. (ed). Nordisk sprogteknologi 2003. Nordic language technology. Årbog for Nordisk Sprogteknologisk Forskningsprogram 2000-2004}, author = {Borin, Lars}, year = {2004}, publisher = {Museum Tusculanums Forlag}, adress = {København}, ISBN = {87-7289-997-2}, pages = {71--82}, }