Skip to content

Commit

Permalink
feat: explore eval target rate (VowpalWabbit#4285)
Browse files Browse the repository at this point in the history
  • Loading branch information
olgavrou authored Nov 28, 2022
1 parent bffd4c2 commit f8dacd4
Show file tree
Hide file tree
Showing 10 changed files with 202 additions and 55 deletions.
2 changes: 1 addition & 1 deletion cs/unittest/RunTests.tt
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ var skipList = new[] { 13, 32, 39, 258, 40, 259, 41, 260, 59, 60, 61, 66, 68, 90
25, 26, 349, 350, 356, 357, 358, // crash
92, 95, 96, 98, 91, 99, 118, 119, 120,
176, 177, 207, 208, //depend on shell scripts for input/output
14, 16, 17, 31, 33, 34,53, 101, 102, 103, 105, 106, 111, 112, 412, 413, 414, // float delta
14, 16, 17, 31, 33, 34,53, 101, 102, 103, 105, 106, 111, 112, 412, 413, 414, 423, 424, 425, // float delta
71, // --examples to test parser
143, 144, 146, 158, 189, 202, 237, 312, 316, 318, 319, 324, 325, 326, 347, 351, 348, // native json parsing
149, 152, 156, 193, 194, 217, 385, // bash script
Expand Down
32 changes: 27 additions & 5 deletions test/core.vwtest.json
Original file line number Diff line number Diff line change
Expand Up @@ -5445,21 +5445,43 @@
},
{
"id": 422,
"desc": "explore_eval evaluate same policy as logged policy, all examples should be used to update with default block_size",
"desc": "explore_eval evaluate same policy as logged policy, all examples should be used to update with default target_rate of 1",
"vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --epsilon 0.1 -q UA --explore_eval",
"diff_files": {
"stderr": "train-sets/ref/explore_eval_block_size_1.stderr"
"stderr": "train-sets/ref/explore_eval_tr_1.stderr"
},
"input_files": [
"train-sets/explore_eval_egreedy.dat"
]
},
{
"id": 423,
"desc": "explore_eval evaluate same policy as logged policy, with block_size > 1 there should be at most ((#examples / block_size) + 1) updates",
"vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --epsilon 0.1 -q UA --explore_eval --block_size 5",
"desc": "explore_eval evaluate same policy as logged policy, target_rate set there should be around ((#examples * target_rate) updates",
"vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --epsilon 0.1 -q UA --explore_eval --target_rate 0.2",
"diff_files": {
"stderr": "train-sets/ref/explore_eval_block_size_5.stderr"
"stderr": "train-sets/ref/explore_eval_tr_0.2.stderr"
},
"input_files": [
"train-sets/explore_eval_egreedy.dat"
]
},
{
"id": 424,
"desc": "explore_eval evaluate same policy as logged policy, target_rate set there should be around ((#examples * target_rate) updates",
"vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --squarecb -q UA --explore_eval --target_rate 0.2",
"diff_files": {
"stderr": "train-sets/ref/explore_eval_sqcb.stderr"
},
"input_files": [
"train-sets/explore_eval_egreedy.dat"
]
},
{
"id": 425,
"desc": "explore_eval evaluate same policy as logged policy, target_rate set there should be around ((#examples * target_rate) updates",
"vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --large_action_space --max_actions 5 -q UA --explore_eval --target_rate 0.2",
"diff_files": {
"stderr": "train-sets/ref/explore_eval_las.stderr"
},
"input_files": [
"train-sets/explore_eval_egreedy.dat"
Expand Down
3 changes: 2 additions & 1 deletion test/train-sets/ref/explore_eval.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.333333
total feature number = 21
update count = 2
weighted update count = 2.000000
average accepted example weight = 1.000000
final multiplier = 0.555556
37 changes: 37 additions & 0 deletions test/train-sets/ref/explore_eval_las.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
creating quadratic features for pairs: UA
Enabling FTRL based optimization
Algorithm used: Coin Betting
ftrl_alpha = 4
ftrl_beta = 1
using no cache
Reading datafile = train-sets/explore_eval_egreedy.dat
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled reductions: ftrl-Coin Betting, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_large_action_space, cb_explore_adf_greedy, explore_eval, cb_actions_mask, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 known 0:0.25 1287
0.000000 0.000000 2 2.0 known 0:0.25 1287
-0.06859 -0.13719 4 4.0 known 0:0.25 1287
-0.29839 -0.52820 8 8.0 known 8:0.96 1287
-0.54535 -0.79230 16 16.0 known 8:0.96 1287
-0.53677 -0.52820 32 32.0 known 8:0.96 1287

finished run
number of examples = 50
weighted example sum = 50.000000
weighted label sum = 0.000000
average loss = -0.408512
total feature number = 63450
weighted update count = 11.250000
average accepted example weight = 1.125000
violation count = 33
final multiplier = 1.000000
targeted update count = 10.000000
final rate = 0.266256
37 changes: 37 additions & 0 deletions test/train-sets/ref/explore_eval_sqcb.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
creating quadratic features for pairs: UA
Enabling FTRL based optimization
Algorithm used: Coin Betting
ftrl_alpha = 4
ftrl_beta = 1
using no cache
Reading datafile = train-sets/explore_eval_egreedy.dat
num sources = 1
Num weight bits = 18
learning rate = 0.5
initial_t = 0
power_t = 0.5
cb_type = mtr
Enabled reductions: ftrl-Coin Betting, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_squarecb, explore_eval, shared_feature_merger
Input label = CB
Output pred = ACTION_PROBS
average since example example current current current
loss last counter weight label predict features
0.000000 0.000000 1 1.0 known 0:0.11 1287
-0.50000 -1.00000 2 2.0 known 0:0.11 1287
-0.32483 -0.14966 4 4.0 known 8:0.27 1287
-0.25223 -0.17962 8 8.0 known 8:0.27 1287
-0.26083 -0.26944 16 16.0 known 8:0.38 1287
-0.20807 -0.15530 32 32.0 known 8:0.4 1287

finished run
number of examples = 50
weighted example sum = 50.000000
weighted label sum = 0.000000
average loss = -0.390862
total feature number = 63450
weighted update count = 16.413322
average accepted example weight = 1.492120
violation count = 4
final multiplier = 1.000000
targeted update count = 10.000000
final rate = 0.230407
Original file line number Diff line number Diff line change
Expand Up @@ -18,17 +18,19 @@ average since example example current current cur
loss last counter weight label predict features
0.000000 0.000000 1 1.0 known 0:0.11 1287
-0.50000 -1.00000 2 2.0 known 0:0.11 1287
-0.28048 -0.06097 4 4.0 known 0:0.11 1287
-0.17073 -0.06097 8 8.0 known 0:0.11 1287
-0.13109 -0.09146 16 16.0 known 0:0.11 1287
-0.26067 -0.39024 32 32.0 known 8:0.91 1287
-0.50000 -0.50000 4 4.0 known 8:0.91 1287
-0.50000 -0.50000 8 8.0 known 8:0.91 1287
-0.62500 -0.75000 16 16.0 known 8:0.91 1287
-0.43902 -0.25304 32 32.0 known 8:0.91 1287

finished run
number of examples = 50
weighted example sum = 50.000000
weighted label sum = 0.000000
average loss = -0.367073
average loss = -0.303415
total feature number = 63450
update count = 7
final multiplier = 0.100000
targeted update count = 10
weighted update count = 9.000000
average accepted example weight = 1.000000
final multiplier = 1.000000
targeted update count = 10.000000
final rate = 0.346828
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,6 @@ weighted example sum = 50.000000
weighted label sum = 0.000000
average loss = -0.580000
total feature number = 63450
update count = 50
weighted update count = 50.000000
average accepted example weight = 1.000000
final multiplier = 1.000000
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ weighted example sum = 3.000000
weighted label sum = 0.000000
average loss = 0.333333
total feature number = 21
update count = 2
weighted update count = 2.000000
average accepted example weight = 1.000000
final multiplier = 0.555556
8 changes: 3 additions & 5 deletions test/train-sets/ref/help.stdout
Original file line number Diff line number Diff line change
Expand Up @@ -618,11 +618,9 @@ Weight Options:
--explore_eval Evaluate explore_eval adf policies (type: bool, keep, necessary)
--multiplier arg Multiplier used to make all rejection sample probabilities <=
1 (type: float)
--block_size arg The examples will be processed in blocks of block_size. If an
example update is found in that block no other examples in the
block will be used to update the policy. If an example is not
used in the block then the quota rolls over and the next block
can update more than one examples (type: uint, default: 1)
--target_rate arg The target rate will be used to adjust the rejection rate in
order to achieve an update count of #examples * target_rate (type:
float)
[Reduction] Follow the Regularized Leader - Coin Options:
--coin Coin betting optimizer (type: bool, keep, necessary)
--ftrl_alpha arg Learning rate for FTRL optimization (type: float)
Expand Down
Loading

0 comments on commit f8dacd4

Please sign in to comment.