Skip to content

Commit

Permalink
fix: explore_eval don't learn if logged action not in predicted actio…
Browse files Browse the repository at this point in the history
…ns (#4262)
  • Loading branch information
olgavrou authored Nov 8, 2022
1 parent e3685a0 commit 0406c0f
Showing 1 changed file with 12 additions and 1 deletion.
13 changes: 12 additions & 1 deletion vowpalwabbit/core/src/reductions/explore_eval.cc
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,7 @@ void do_actual_learning(explore_eval& data, multi_learner& base, VW::multi_ex& e
data.action_label = std::move(label_example->l.cb);
label_example->l.cb = std::move(data.empty_label);
}

multiline_learn_or_predict<false>(base, ec_seq, data.offset);

if (label_example != nullptr) // restore label
Expand All @@ -159,11 +160,18 @@ void do_actual_learning(explore_eval& data, multi_learner& base, VW::multi_ex& e
VW::action_scores& a_s = ec_seq[0]->pred.a_s;

float action_probability = 0;
bool action_found = false;
for (size_t i = 0; i < a_s.size(); i++)
{
if (data.known_cost.action == a_s[i].action) { action_probability = a_s[i].score; }
if (data.known_cost.action == a_s[i].action)
{
action_probability = a_s[i].score;
action_found = true;
}
}

if (!action_found) { return; }

float threshold = action_probability / data.known_cost.probability;

if (!data.fixed_multiplier) { data.multiplier = std::min(data.multiplier, 1 / threshold); }
Expand All @@ -183,15 +191,18 @@ void do_actual_learning(explore_eval& data, multi_learner& base, VW::multi_ex& e
{ ec_found = ec; }
if (threshold > 1) { ec->weight *= threshold; }
}

ec_found->l.cb.costs[0].probability = action_probability;

multiline_learn_or_predict<true>(base, ec_seq, data.offset);

// restore logged example
if (threshold > 1)
{
float inv_threshold = 1.f / threshold;
for (auto& ec : ec_seq) { ec->weight *= inv_threshold; }
}

ec_found->l.cb.costs[0].probability = data.known_cost.probability;
data.update_count++;
}
Expand Down

0 comments on commit 0406c0f

Please sign in to comment.