@inproceedings{7221172374994d779d4153b9b6fd191d,
title = "Missing data imputation using optimal transport",
abstract = "Missing data is a crucial issue when applying machine learning algorithms to real-world datasets. Starting from the simple assumption that two batches extracted randomly from the same dataset should share the same distribution, we leverage optimal transport distances to quantify that criterion and turn it into a loss function to impute missing data values. We propose practical methods to minimize these losses using end-to-end learning, that can exploit or not parametric assumptions on the underlying distributions of values. We evaluate our methods on datasets from the UCI repository, in MCAR, MAR and MNAR settings. These experiments show that OT-based methods match or out-perform state-of-the-art imputation methods, even for high percentages of missing values.",
author = "Boris Muzellec and Julie Josse and Claire Boyer and Marco Cuturi",
note = "Publisher Copyright: {\textcopyright} 2020 37th International Conference on Machine Learning, ICML 2020. All rights reserved.; 37th International Conference on Machine Learning, ICML 2020 ; Conference date: 13-07-2020 Through 18-07-2020",
year = "2020",
month = jan,
day = "1",
language = "English",
series = "37th International Conference on Machine Learning, ICML 2020",
publisher = "International Machine Learning Society (IMLS)",
pages = "7087--7097",
editor = "Hal Daume and Aarti Singh",
booktitle = "37th International Conference on Machine Learning, ICML 2020",
}