Commit db890d45 authored by Petteri Pulkkinen's avatar Petteri Pulkkinen
Browse files

Update references


Signed-off-by: Petteri Pulkkinen's avatarPetteri Pulkkinen <petteri.pulkkinen@aalto.fi>
parent 53d8aa06
......@@ -2478,6 +2478,23 @@ The appraoch is based on the following:
url = {https://doi.org/10.1145/3055399.3055403},
}
@InProceedings{Flaxman2005,
author = {Flaxman, Abraham D. and Kalai, Adam Tauman and McMahan, H. Brendan},
booktitle = {Proceedings of the Sixteenth Annual ACM-SIAM Symposium on Discrete Algorithms},
title = {Online convex optimization in the bandit setting: {Gradient} descent without a gradientl},
year = {2005},
address = {USA},
pages = {385–394},
publisher = {Society for Industrial and Applied Mathematics},
series = {SODA '05},
abstract = {We study a general online convex optimization problem. We have a convex set S and an unknown sequence of cost functions c1, c2,..., and in each period, we choose a feasible point xt in S, and learn the cost ct(xt). If the function ct is also revealed after each period then, as Zinkevich shows in [25], gradient descent can be used on these functions to get regret bounds of O(√n). That is, after n rounds, the total cost incurred will be O(√n) more than the cost of the best single feasible decision chosen with the benefit of hindsight, minx Σ ct(x).We extend this to the "bandit" setting, where, in each period, only the cost ct(xt) is revealed, and bound the expected regret as O(n3/4).Our approach uses a simple approximation of the gradient that is computed from evaluating ct at a single (random) point. We show that this biased estimate is sufficient to approximate gradient descent on the sequence of functions. In other words, it is possible to use gradient descent without seeing anything more than the value of the functions at a single point. The guarantees hold even in the most general case: online against an adaptive adversary.For the online linear optimization problem [15], algorithms with low regrets in the bandit setting have recently been given against oblivious [1] and adaptive adversaries [19]. In contrast to these algorithms, which distinguish between explicit explore and exploit periods, our algorithm can be interpreted as doing a small amount of exploration in each period.},
comment = {Seminal paper providing gradient descent based BCO algorithm},
groups = {Bandit convex optimization},
isbn = {0898715857},
location = {Vancouver, British Columbia},
numpages = {10},
}
@Comment{jabref-meta: databaseType:bibtex;}
@Comment{jabref-meta: grouping:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment