@inproceedings{217d385b893c4d23aa87ea6c0c07102f,
title = "Scaling up M-estimation via sampling designs: The Horvitz-Thompson stochastic gradient descent",
abstract = "In certain situations that shall be undoubtedly more and more common in the Big Data era, the datasets available are so massive that computing statistics over the full sample is hardly feasible, if not unfeasible. A natural approach in this context consists in using survey schemes and substituting the 'full data' statistics with their counterparts based on the resulting random samples, of manageable size. It is the purpose of this paper to investigate the impact of survey sampling with unequal inclusion probabilities on (stochastic) gradient descent-based M-estimation methods in large-scale statistical-learning problems. We prove that, in presence of some a priori information, one may significantly reduce the number of terms that must be averaged to estimate the gradient at each step with overwhelming probability, while preserving the asymptotic accuracy. These striking results are described here by limit theorems.",
keywords = "Horvitz-Thompson estimation, sampling design, statistical learning, stochastic gradient descent, survey",
author = "Stephan Clemencon and Patrice Bertail and Emilie Chautru",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 2nd IEEE International Conference on Big Data, Big Data 2014 ; Conference date: 27-10-2014 Through 30-10-2014",
year = "2014",
month = jan,
day = "1",
doi = "10.1109/BigData.2014.7004208",
language = "English",
series = "Proceedings - 2014 IEEE International Conference on Big Data, IEEE Big Data 2014",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "25--30",
editor = "Jimmy Lin and Jian Pei and Hu, \{Xiaohua Tony\} and Wo Chang and Raghunath Nambiar and Charu Aggarwal and Nick Cercone and Vasant Honavar and Jun Huan and Bamshad Mobasher and Saumyadipta Pyne",
booktitle = "Proceedings - 2014 IEEE International Conference on Big Data, Big Data 2014",
}