-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathreferences.bib
251 lines (236 loc) · 10.3 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
---
---
References
==========
@article{DBLP:journals/corr/AgarwalBCHLLLMO16,
author = {Alekh Agarwal and
Sarah Bird and
Markus Cozowicz and
Luong Hoang and
John Langford and
Stephen Lee and
Jiaji Li and
Dan Melamed and
Gal Oshri and
Oswaldo Ribas and
Siddhartha Sen and
Alex Slivkins},
title = {A Multiworld Testing Decision Service},
journal = {CoRR},
volume = {abs/1606.03966},
year = {2016},
url = {https://arxiv.org/abs/1606.03966},
archivePrefix = {arXiv},
eprint = {1606.03966},
timestamp = {Mon, 13 Aug 2018 16:46:15 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/AgarwalBCHLLLMO16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1003-0146,
author = {Lihong Li and
Wei Chu and
John Langford and
Robert E. Schapire},
title = {A Contextual-Bandit Approach to Personalized News Article Recommendation},
journal = {CoRR},
volume = {abs/1003.0146},
year = {2010},
url = {https://arxiv.org/abs/1003.0146},
archivePrefix = {arXiv},
eprint = {1003.0146},
timestamp = {Mon, 13 Aug 2018 16:47:40 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1003-0146},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{doi:10.1080/01621459.1952.10483446,
author = { D. G. Horvitz and D. J. Thompson },
title = {A Generalization of Sampling Without Replacement from a Finite Universe},
journal = {Journal of the American Statistical Association},
volume = {47},
number = {260},
pages = {663-685},
year = {1952},
publisher = {Taylor & Francis},
doi = {10.1080/01621459.1952.10483446},
URL = {https://www.tandfonline.com/doi/abs/10.1080/01621459.1952.10483446},
eprint = {https://www.tandfonline.com/doi/pdf/10.1080/01621459.1952.10483446}
}
@inproceedings{DBLP:conf/icml/JiangL16,
author = {Nan Jiang and
Lihong Li},
title = {Doubly Robust Off-policy Value Evaluation for Reinforcement Learning},
booktitle = {Proceedings of the 33nd International Conference on Machine Learning,
{ICML} 2016, New York City, NY, USA, June 19-24, 2016},
pages = {652--661},
year = {2016},
crossref = {DBLP:conf/icml/2016},
url = {https://proceedings.mlr.press/v48/jiang16.html},
timestamp = {Wed, 29 May 2019 08:41:46 +0200},
biburl = {https://dblp.org/rec/bib/conf/icml/JiangL16},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@inproceedings{DBLP:conf/icml/DudikLL11,
author = {Miroslav Dud{\'{\i}}k and
John Langford and
Lihong Li},
title = {Doubly Robust Policy Evaluation and Learning},
booktitle = {Proceedings of the 28th International Conference on Machine Learning,
{ICML} 2011, Bellevue, Washington, USA, June 28 - July 2, 2011},
pages = {1097--1104},
year = {2011},
crossref = {DBLP:conf/icml/2011},
url = {https://icml.cc/2011/papers/554\_icmlpaper.pdf},
timestamp = {Wed, 03 Apr 2019 17:43:35 +0200},
biburl = {https://dblp.org/rec/bib/conf/icml/DudikLL11},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@Miscellaneous{bietti2018a,
author = {Bietti, Alberto and Agarwal, Alekh and Langford, John},
title = {A Contextual Bandit Bake-off},
howpublished = {arXiv:1802.04064v3 [stat.ML]},
year = {2018},
month = {December},
abstract = {Contextual bandit algorithms are essential for solving many real-world interactive machine learning problems. Despite multiple recent successes on statistically and computationally efficient methods, the practical behavior of these algorithms is still poorly understood. We leverage the availability of large numbers of supervised learning datasets to compare and empirically optimize contextual bandit algorithms, focusing on practical methods that learn by relying on optimization oracles from supervised learning. We find that a recent method (Foster et al., 2018) using optimism under uncertainty works the best overall. A surprisingly close second is a simple greedy baseline that only explores implicitly through the diversity of contexts, followed by a variant of Online Cover (Agarwal et al., 2014) which tends to be more conservative but robust to problem specification by design. Along the way, we also evaluate and improve several internal components of contextual bandit algorithm design. Overall, this is a thorough study and review of contextual bandit methodology.},
url = {https://www.microsoft.com/en-us/research/publication/a-contextual-bandit-bake-off-2/},
}
@inproceedings{Karampatziakis:2011:OIW:3020548.3020594,
author = {Karampatziakis, Nikos and Langford, John},
title = {Online Importance Weight Aware Updates},
booktitle = {Proceedings of the Twenty-Seventh Conference on Uncertainty in Artificial Intelligence},
series = {UAI'11},
year = {2011},
isbn = {978-0-9749039-7-2},
location = {Barcelona, Spain},
pages = {392--399},
numpages = {8},
url = {https://dl.acm.org/citation.cfm?id=3020548.3020594},
acmid = {3020594},
publisher = {AUAI Press},
address = {Arlington, Virginia, United States},
}
@article{DBLP:journals/corr/OsbandR15,
author = {Ian Osband and
Benjamin Van Roy},
title = {Bootstrapped Thompson Sampling and Deep Exploration},
journal = {CoRR},
volume = {abs/1507.00300},
year = {2015},
url = {https://arxiv.org/abs/1507.00300},
archivePrefix = {arXiv},
eprint = {1507.00300},
timestamp = {Mon, 13 Aug 2018 16:48:40 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/OsbandR15},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/EcklesK14,
author = {Dean Eckles and
Maurits Kaptein},
title = {Thompson sampling with the online bootstrap},
journal = {CoRR},
volume = {abs/1410.4009},
year = {2014},
url = {https://arxiv.org/abs/1410.4009},
archivePrefix = {arXiv},
eprint = {1410.4009},
timestamp = {Mon, 13 Aug 2018 16:48:43 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/EcklesK14},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/AgarwalHKLLS14,
author = {Alekh Agarwal and
Daniel J. Hsu and
Satyen Kale and
John Langford and
Lihong Li and
Robert E. Schapire},
title = {Taming the Monster: {A} Fast and Simple Algorithm for Contextual Bandits},
journal = {CoRR},
volume = {abs/1402.0555},
year = {2014},
url = {https://arxiv.org/abs/1402.0555},
archivePrefix = {arXiv},
eprint = {1402.0555},
timestamp = {Mon, 13 Aug 2018 16:49:15 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/AgarwalHKLLS14},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1811-04383,
author = {David Cortes},
title = {Adapting multi-armed bandits policies to contextual bandits scenarios},
journal = {CoRR},
volume = {abs/1811.04383},
year = {2018},
url = {https://arxiv.org/abs/1811.04383},
archivePrefix = {arXiv},
eprint = {1811.04383},
timestamp = {Fri, 23 Nov 2018 12:43:51 +0100},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1811-04383},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{Shi:2009:HKS:1577069.1755873,
author = {Shi, Qinfeng and Petterson, James and Dror, Gideon and Langford, John and Smola, Alex and Vishwanathan, S.V.N.},
title = {Hash Kernels for Structured Data},
journal = {J. Mach. Learn. Res.},
issue_date = {12/1/2009},
volume = {10},
month = dec,
year = {2009},
issn = {1532-4435},
pages = {2615--2637},
numpages = {23},
url = {https://dl.acm.org/citation.cfm?id=1577069.1755873},
acmid = {1755873},
publisher = {JMLR.org},
}
@article{DBLP:journals/corr/abs-0902-2206,
author = {Kilian Q. Weinberger and
Anirban Dasgupta and
Josh Attenberg and
John Langford and
Alexander J. Smola},
title = {Feature Hashing for Large Scale Multitask Learning},
journal = {CoRR},
volume = {abs/0902.2206},
year = {2009},
url = {https://arxiv.org/abs/0902.2206},
archivePrefix = {arXiv},
eprint = {0902.2206},
timestamp = {Mon, 13 Aug 2018 16:48:03 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-0902-2206},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/abs-1110-4198,
author = {Alekh Agarwal and
Olivier Chapelle and
Miroslav Dud{\'{\i}}k and
John Langford},
title = {A Reliable Effective Terascale Linear Learning System},
journal = {CoRR},
volume = {abs/1110.4198},
year = {2011},
url = {https://arxiv.org/abs/1110.4198},
archivePrefix = {arXiv},
eprint = {1110.4198},
timestamp = {Mon, 13 Aug 2018 16:46:53 +0200},
biburl = {https://dblp.org/rec/bib/journals/corr/abs-1110-4198},
bibsource = {dblp computer science bibliography, https://dblp.org}
}
@article{DBLP:journals/corr/SwaminathanKADL16,
author = {Adith Swaminathan and
Akshay Krishnamurthy and
Alekh Agarwal and
Miroslav Dud{\'{\i}}k and
John Langford and
Damien Jose and
Imed Zitouni},
title = {Off-policy evaluation for slate recommendation},
journal = {CoRR},
volume = {abs/1605.04812},
year = {2016},
url = {https://arxiv.org/abs/1605.04812},
archivePrefix = {arXiv},
eprint = {1605.04812},
timestamp = {Mon, 13 Aug 2018 16:48:14 +0200},
biburl = {https://dblp.org/rec/journals/corr/SwaminathanKADL16.bib},
bibsource = {dblp computer science bibliography, https://dblp.org}
}