Skip to main content

BibTeX

@article{elmerot-etal-2026-voices-362492,
	title        = {Voices of formerly enslaved: A new text corpus of narratives by formerly enslaved persons},
	abstract     = {These data consist of newly OCRed and annotated narratives, both autobiographical texts written by, and interviews with, formerly enslaved persons of African descent in the United States of America and the Caribbean, including extensive time-related and geographical metadata. The texts authored by these individuals span from the years 1795 to approximately 1900, while the interviews were conducted in the 1930s. The former are written in standardised English from that time, whereas the latter often are written down in mediated, vernacular form, causing issues in lemmatisation and part-of-speech tagging. The aim is to create openly accessible corpora that can be utilised for the purpose of researching how these formerly enslaved persons described their own lives.},
	journal      = {Scientific Data},
	author       = {Elmerot, Irene and Olsson, Leif-Jöran and Rönnbäck, Klas},
	year         = {2026},
	volume       = {13},
	number       = {1},
}

@misc{elmerot-etal-2026-voices-360700,
	title        = {Voices of formerly Enslaved Corpus},
	abstract     = {A corpus of transcribed and annotated narratives from informants who were formerly enslaved. Texts selected are mainly in vernacular English, but partly also in standard English [eng]. There are two main parts:

Selection of narratives collected in the Federal Writers' Projct (FWP) collected in late 1930's and published in 1941.
Selection of narratives collected as part of the Documenting the American South Collection (DocSouth)
The Corpus has been developed in collaboration with Språkbanken Text.},
	author       = {Elmerot, Irene and Olsson, Leif-Jöran and Rönnbäck, Klas},
	year         = {2026},
}