@inproceedings{1ffb765b75534ba481ca981597539fb5,
title = "Gap-free bounds for stochastic multi-armed bandit",
abstract = "We consider the stochastic multi-armed bandit problem with unknown horizon. We present a randomized decision strategy which is based on updating a probability distribution through a stochastic mirror descent/exponentiated gradient type algorithm. We consider separately two assumptions: nonnegative losses or arbitrary losses with an exponential moment condition. We prove optimal (up to logarithmic factors) gap-free bounds on the excess risk of the average over time of the instantaneous losses induced by the choice of a specific action.",
keywords = "Learning theory, Randomized methods, Stochastic control",
author = "Anatoly Juditsky and Nazin, \{Alexander V.\} and Alexander Tsybakov and Nicolas Vayatis",
year = "2008",
month = dec,
day = "1",
doi = "10.3182/20080706-5-KR-1001.2585",
language = "English",
isbn = "9783902661005",
series = "IFAC Proceedings Volumes (IFAC-PapersOnline)",
number = "1 PART 1",
booktitle = "Proceedings of the 17th World Congress, International Federation of Automatic Control, IFAC",
edition = "1 PART 1",
note = "17th World Congress, International Federation of Automatic Control, IFAC ; Conference date: 06-07-2008 Through 11-07-2008",
}