@inproceedings{34daf094bee74aa785548fb8767b1219,
title = "Tuple reconstruction",
abstract = "Set of tuples expansion system (STEP) extracts information from the Web in the form of tuples. It builds a graph of entities consisting of Web pages, wrappers, seeds, domains, and candidates as its nodes while the relationships between them as edges. The final weight given for each node after running random walks on the graph is used to order the extracted candidates. Due to the nature of the regular expressions used as wrappers, some of the extracted candidates may contain “noise” and therefore can be considered as “false”. These false candidates may rank higher than the “true” ones on the list because they are extracted from many Web pages or produced by many different wrappers. Minimizing these false candidates is necessary to ensure the validity of the result presented. In this research, we propose a method to tackle the aforementioned problem of STEP by reconstructing tuples. We begin with extracting binary tuples from the Web. These binary tuples consist of a key attribute and a property of the attribute. To validate the truthfulness of the binary tuples, we apply truth-finding algorithms. This helps us in building a credible list of binary tuples. We propose two methods to reconstruct tuples from binary ones. We use the reconstructed tuples to enrich the graph of entities of STEP such that the “true” candidates receive more confidence and rank higher in the graph. We show that our approach is efficient and significantly improve the confidence level of the tuples extracted by STEP. We also conduct an experiment on a real-world case of populating a database relation from the Web with our proposed approach.",
keywords = "Reconstruction, Set expansion, Truth-finding, Tuples",
author = "Er, \{Ngurah Agus Sanjaya\} and Ba, \{Mouhamadou Lamine\} and Talel Abdessalem and St{\'e}phane Bressan",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG, part of Springer Nature 2018.; 23rd International Conference on Database Systems for Advanced Applications, DASFAA 2018 ; Conference date: 21-05-2018 Through 24-05-2018",
year = "2018",
month = jan,
day = "1",
doi = "10.1007/978-3-319-91455-8\_21",
language = "English",
isbn = "9783319914541",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "239--254",
editor = "Jianxin Li and Lei Zou and Chengfei Liu",
booktitle = "Database Systems for Advanced Applications - DASFAA 2018 International Workshops",
}