@inproceedings{291472ce7d454e60bb474b825a9c99aa,
title = "Scalable model-based cascaded imputation of missing data",
abstract = "Missing data is a common trait of real-world data that can negatively impact interpretability. In this paper, we present Cascade Imputation (CIM), an effective and scalable technique for automatic imputation of missing data. CIM is not restrictive on the characteristics of the data set, providing support for: Missing At Random and Missing Completely At Random data, numerical and nominal attributes, and large data sets including highly dimensional data sets. We compare CIM against well-established imputation techniques over a variety of data sets under multiple test configurations to measure the impact of imputation on the classification problem. Test results show that CIM outperforms other imputation methods over multiple test conditions. Additionally, we identify optimal performance and failure conditions for popular imputation techniques.",
keywords = "Classification, Imputation, Missing data",
author = "Jacob Montiel and Jesse Read and Albert Bifet and Talel Abdessalem",
note = "Publisher Copyright: {\textcopyright} Springer International Publishing AG, part of Springer Nature 2018.; 22nd Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining, PAKDD 2018 ; Conference date: 03-06-2018 Through 06-06-2018",
year = "2018",
month = jan,
day = "1",
doi = "10.1007/978-3-319-93040-4\_6",
language = "English",
isbn = "9783319930398",
series = "Lecture Notes in Computer Science (including subseries Lecture Notes in Artificial Intelligence and Lecture Notes in Bioinformatics)",
publisher = "Springer Verlag",
pages = "64--76",
editor = "Webb, \{Geoffrey I.\} and Dinh Phung and Mohadeseh Ganji and Lida Rashidi and Tseng, \{Vincent S.\} and Bao Ho",
booktitle = "Advances in Knowledge Discovery and Data Mining - 22nd Pacific-Asia Conference, PAKDD 2018, Proceedings",
}