feat: explore eval target rate (VowpalWabbit#4285)

bassmang · Nov 28, 2022 · f8dacd4 · f8dacd4
1 parent bffd4c2
commit f8dacd4
Show file tree

Hide file tree

Showing 10 changed files with 202 additions and 55 deletions.
diff --git a/cs/unittest/RunTests.tt b/cs/unittest/RunTests.tt
@@ -24,7 +24,7 @@ var skipList = new[] { 13, 32, 39, 258, 40, 259, 41, 260, 59, 60, 61, 66, 68, 90
     25, 26, 349, 350, 356, 357, 358, // crash
     92, 95, 96, 98,	91, 99, 118, 119, 120,
     176, 177, 207, 208, //depend on shell scripts for input/output
-    14, 16, 17, 31, 33, 34,53, 101, 102, 103, 105, 106, 111, 112, 412, 413, 414, // float delta
+    14, 16, 17, 31, 33, 34,53, 101, 102, 103, 105, 106, 111, 112, 412, 413, 414, 423, 424, 425, // float delta
     71, // --examples to test parser
     143, 144, 146, 158, 189, 202, 237, 312, 316, 318, 319, 324, 325, 326, 347, 351, 348, // native json parsing
     149, 152, 156, 193, 194, 217, 385, // bash script

diff --git a/test/core.vwtest.json b/test/core.vwtest.json
@@ -5445,21 +5445,43 @@
   },
   {
     "id": 422,
-    "desc": "explore_eval evaluate same policy as logged policy, all examples should be used to update with default block_size",
+    "desc": "explore_eval evaluate same policy as logged policy, all examples should be used to update with default target_rate of 1",
     "vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --epsilon 0.1 -q UA --explore_eval",
     "diff_files": {
-      "stderr": "train-sets/ref/explore_eval_block_size_1.stderr"
+      "stderr": "train-sets/ref/explore_eval_tr_1.stderr"
     },
     "input_files": [
       "train-sets/explore_eval_egreedy.dat"
     ]
   },
   {
     "id": 423,
-    "desc": "explore_eval evaluate same policy as logged policy, with block_size > 1 there should be at most ((#examples / block_size) + 1) updates",
-    "vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --epsilon 0.1 -q UA --explore_eval --block_size 5",
+    "desc": "explore_eval evaluate same policy as logged policy, target_rate set there should be around ((#examples * target_rate) updates",
+    "vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --epsilon 0.1 -q UA --explore_eval --target_rate 0.2",
     "diff_files": {
-      "stderr": "train-sets/ref/explore_eval_block_size_5.stderr"
+      "stderr": "train-sets/ref/explore_eval_tr_0.2.stderr"
+    },
+    "input_files": [
+      "train-sets/explore_eval_egreedy.dat"
+    ]
+  },
+  {
+    "id": 424,
+    "desc": "explore_eval evaluate same policy as logged policy, target_rate set there should be around ((#examples * target_rate) updates",
+    "vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --squarecb -q UA --explore_eval --target_rate 0.2",
+    "diff_files": {
+      "stderr": "train-sets/ref/explore_eval_sqcb.stderr"
+    },
+    "input_files": [
+      "train-sets/explore_eval_egreedy.dat"
+    ]
+  },
+  {
+    "id": 425,
+    "desc": "explore_eval evaluate same policy as logged policy, target_rate set there should be around ((#examples * target_rate) updates",
+    "vw_command": "--cb_explore_adf -d train-sets/explore_eval_egreedy.dat --coin --large_action_space --max_actions 5 -q UA --explore_eval --target_rate 0.2",
+    "diff_files": {
+      "stderr": "train-sets/ref/explore_eval_las.stderr"
     },
     "input_files": [
       "train-sets/explore_eval_egreedy.dat"

diff --git a/test/train-sets/ref/explore_eval.stderr b/test/train-sets/ref/explore_eval.stderr
@@ -21,5 +21,6 @@ weighted example sum = 3.000000
 weighted label sum = 0.000000
 average loss = 0.333333
 total feature number = 21
-update count = 2
+weighted update count = 2.000000
+average accepted example weight = 1.000000
 final multiplier = 0.555556
diff --git a/test/train-sets/ref/explore_eval_las.stderr b/test/train-sets/ref/explore_eval_las.stderr
@@ -0,0 +1,37 @@
+creating quadratic features for pairs: UA
+Enabling FTRL based optimization
+Algorithm used: Coin Betting
+ftrl_alpha = 4
+ftrl_beta = 1
+using no cache
+Reading datafile = train-sets/explore_eval_egreedy.dat
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+cb_type = mtr
+Enabled reductions: ftrl-Coin Betting, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_large_action_space, cb_explore_adf_greedy, explore_eval, cb_actions_mask, shared_feature_merger
+Input label = CB
+Output pred = ACTION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+0.000000 0.000000            1            1.0          known         0:0.25     1287
+0.000000 0.000000            2            2.0          known         0:0.25     1287
+-0.06859 -0.13719            4            4.0          known         0:0.25     1287
+-0.29839 -0.52820            8            8.0          known         8:0.96     1287
+-0.54535 -0.79230           16           16.0          known         8:0.96     1287
+-0.53677 -0.52820           32           32.0          known         8:0.96     1287
+
+finished run
+number of examples = 50
+weighted example sum = 50.000000
+weighted label sum = 0.000000
+average loss = -0.408512
+total feature number = 63450
+weighted update count = 11.250000
+average accepted example weight = 1.125000
+violation count = 33
+final multiplier = 1.000000
+targeted update count = 10.000000
+final rate = 0.266256
diff --git a/test/train-sets/ref/explore_eval_sqcb.stderr b/test/train-sets/ref/explore_eval_sqcb.stderr
@@ -0,0 +1,37 @@
+creating quadratic features for pairs: UA
+Enabling FTRL based optimization
+Algorithm used: Coin Betting
+ftrl_alpha = 4
+ftrl_beta = 1
+using no cache
+Reading datafile = train-sets/explore_eval_egreedy.dat
+num sources = 1
+Num weight bits = 18
+learning rate = 0.5
+initial_t = 0
+power_t = 0.5
+cb_type = mtr
+Enabled reductions: ftrl-Coin Betting, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_squarecb, explore_eval, shared_feature_merger
+Input label = CB
+Output pred = ACTION_PROBS
+average  since         example        example        current        current  current
+loss     last          counter         weight          label        predict features
+0.000000 0.000000            1            1.0          known         0:0.11     1287
+-0.50000 -1.00000            2            2.0          known         0:0.11     1287
+-0.32483 -0.14966            4            4.0          known         8:0.27     1287
+-0.25223 -0.17962            8            8.0          known         8:0.27     1287
+-0.26083 -0.26944           16           16.0          known         8:0.38     1287
+-0.20807 -0.15530           32           32.0          known          8:0.4     1287
+
+finished run
+number of examples = 50
+weighted example sum = 50.000000
+weighted label sum = 0.000000
+average loss = -0.390862
+total feature number = 63450
+weighted update count = 16.413322
+average accepted example weight = 1.492120
+violation count = 4
+final multiplier = 1.000000
+targeted update count = 10.000000
+final rate = 0.230407
diff --git a/...sets/ref/explore_eval_block_size_5.stderr → ...train-sets/ref/explore_eval_tr_0.2.stderr b/...sets/ref/explore_eval_block_size_5.stderr → ...train-sets/ref/explore_eval_tr_0.2.stderr
@@ -18,17 +18,19 @@ average  since         example        example        current        current  cur
 loss     last          counter         weight          label        predict features
 0.000000 0.000000            1            1.0          known         0:0.11     1287
 -0.50000 -1.00000            2            2.0          known         0:0.11     1287
--0.28048 -0.06097            4            4.0          known         0:0.11     1287
--0.17073 -0.06097            8            8.0          known         0:0.11     1287
--0.13109 -0.09146           16           16.0          known         0:0.11     1287
--0.26067 -0.39024           32           32.0          known         8:0.91     1287
+-0.50000 -0.50000            4            4.0          known         8:0.91     1287
+-0.50000 -0.50000            8            8.0          known         8:0.91     1287
+-0.62500 -0.75000           16           16.0          known         8:0.91     1287
+-0.43902 -0.25304           32           32.0          known         8:0.91     1287
 
 finished run
 number of examples = 50
 weighted example sum = 50.000000
 weighted label sum = 0.000000
-average loss = -0.367073
+average loss = -0.303415
 total feature number = 63450
-update count = 7
-final multiplier = 0.100000
-targeted update count = 10
+weighted update count = 9.000000
+average accepted example weight = 1.000000
+final multiplier = 1.000000
+targeted update count = 10.000000
+final rate = 0.346828
diff --git a/...sets/ref/explore_eval_block_size_1.stderr → test/train-sets/ref/explore_eval_tr_1.stderr b/...sets/ref/explore_eval_block_size_1.stderr → test/train-sets/ref/explore_eval_tr_1.stderr
@@ -29,5 +29,6 @@ weighted example sum = 50.000000
 weighted label sum = 0.000000
 average loss = -0.580000
 total feature number = 63450
-update count = 50
+weighted update count = 50.000000
+average accepted example weight = 1.000000
 final multiplier = 1.000000
diff --git a/test/train-sets/ref/explore_eval_with_empty_shared_feature.stderr b/test/train-sets/ref/explore_eval_with_empty_shared_feature.stderr
@@ -21,5 +21,6 @@ weighted example sum = 3.000000
 weighted label sum = 0.000000
 average loss = 0.333333
 total feature number = 21
-update count = 2
+weighted update count = 2.000000
+average accepted example weight = 1.000000
 final multiplier = 0.555556
diff --git a/test/train-sets/ref/help.stdout b/test/train-sets/ref/help.stdout
@@ -618,11 +618,9 @@ Weight Options:
     --explore_eval                          Evaluate explore_eval adf policies (type: bool, keep, necessary)
     --multiplier arg                        Multiplier used to make all rejection sample probabilities <=
                                             1 (type: float)
-    --block_size arg                        The examples will be processed in blocks of block_size. If an
-                                            example update is found in that block no other examples in the
-                                            block will be used to update the policy. If an example is not
-                                            used in the block then the quota rolls over and the next block
-                                            can update more than one examples (type: uint, default: 1)
+    --target_rate arg                       The target rate will be used to adjust the rejection rate in
+                                            order to achieve an update count of #examples * target_rate (type:
+                                            float)
 [Reduction] Follow the Regularized Leader - Coin Options:
     --coin                                  Coin betting optimizer (type: bool, keep, necessary)
     --ftrl_alpha arg                        Learning rate for FTRL optimization (type: float)