@inproceedings{af822ad88db041289936da8d0416372f,
title = "Extracting Linked data from statistic spreadsheets",
abstract = "Statistic data is an important sub-category of open data; it is interesting for many applications, including but not limited to data journalism, as such data is typically of high quality, and reflects (under an aggregated form) important aspects of a society's life such as births, immigration, economy etc. However, such open data is often not published as Linked Open Data (LOD) limiting its usability. We provide a conceptual model for the open data comprised in statistics published by INSEE, the national French economic and societal statistics institute. Then, we describe a novel method for extracting RDF LOD, to populate an instance of this model. We used our method to produce RDF data out of 20k+ Excel spreadsheets, and our validation indicates a 91\% rate of successful extraction.",
keywords = "Information extraction, Linked data, RDF",
author = "Cao, \{Tien Duc\} and Ioana Manolescu and Xavier Tannier",
note = "Publisher Copyright: {\textcopyright} 2017 ACM.; International Workshop on Semantic Big Data, SBD 2017 ; Conference date: 19-05-2017",
year = "2017",
month = may,
day = "19",
doi = "10.1145/3066911.3066914",
language = "English",
series = "Proceedings of the International Workshop on Semantic Big Data, SBD 2017 - In conjunction with the 2017 ACM SIGMOD/PODS Conference",
publisher = "Association for Computing Machinery, Inc",
editor = "Le Gruenwald and Sven Groppe",
booktitle = "Proceedings of the International Workshop on Semantic Big Data, SBD 2017 - In conjunction with the 2017 ACM SIGMOD/PODS Conference",
}