diff --git a/obp/dataset/synthetic.py b/obp/dataset/synthetic.py index f1f0232..1a8302d 100644 --- a/obp/dataset/synthetic.py +++ b/obp/dataset/synthetic.py @@ -824,9 +824,9 @@ def _base_reward_function( expected_rewards = context_values + action_values + context_action_values if z_score: expected_rewards = ( - expected_rewards - expected_rewards.mean() / expected_rewards.std() - ) - + expected_rewards - expected_rewards.mean() + ) / expected_rewards.std() + expected_rewards = degree * expected_rewards return expected_rewards diff --git a/tests/ope/test_offline_estimation_performance.py b/tests/ope/test_offline_estimation_performance.py index 2079bfd..73e550d 100644 --- a/tests/ope/test_offline_estimation_performance.py +++ b/tests/ope/test_offline_estimation_performance.py @@ -390,5 +390,5 @@ def process(i: int): ] for estimator_name in tested_estimators: assert ( - relative_ee_df_mean[estimator_name] / relative_ee_df_mean["naive"] < 1.5 + relative_ee_df_mean[estimator_name] / relative_ee_df_mean["naive"] < 1.65 ), f"{estimator_name} is significantly worse than naive (on-policy) estimator"