@inproceedings{664dc2597642412d93a205f7561c0098,
title = "Human-in-the-loop schema inference for massive JSON datasets",
abstract = "JSON established itself as a popular data format for representing data whose structure is irregular or unknown a priori. JSON collections are usually massive and schema-less. Inferring a schema describing the structure of these collections is crucial for formulating meaningful queries and for adopting schema-based optimizations. In a recent work, we proposed a Map/Reduce schema inference approach that either infers a compact representation of the input collection or a precise description of every possible shape in the data. Since no level of precision is ideal, it is more appealing to give the analyst the freedom of choosing between different levels of precisions in an interactive fashion. In this paper we describe a schema inference system offering this important functionality.",
author = "Baazizi, \{Mohamed Amine\} and Cl{\'e}ment Berti and Dario Colazzo and Giorgio Ghelli and Carlo Sartiani",
note = "Publisher Copyright: {\textcopyright} 2020 Copyright held by the owner/author(s); 23rd International Conference on Extending Database Technology, EDBT 2020 ; Conference date: 30-03-2020 Through 02-04-2020",
year = "2020",
month = jan,
day = "1",
doi = "10.5441/002/edbt.2020.82",
language = "English",
series = "Advances in Database Technology - EDBT",
publisher = "OpenProceedings.org",
pages = "635--638",
editor = "Angela Bonifati and Yongluan Zhou and \{Vaz Salles\}, \{Marcos Antonio\} and Alexander Bohm and Dan Olteanu and George Fletcher and Arijit Khan and Bin Yang",
booktitle = "Advances in Database Technology - EDBT 2020",
}