Skip to content

Commit

Permalink
update benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
nocotan committed Jul 27, 2021
1 parent edf2164 commit 9154ae1
Show file tree
Hide file tree
Showing 3 changed files with 180 additions and 7 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
16 changes: 9 additions & 7 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -92,13 +92,15 @@

#### Importance Weighted Empirical Risk Minimizations

| Models | W=0 | W=5 | W=10 | W=15 | W=20 | W=25 | W=30 | W=35 | W=40 | W=45 | W=50 |
|-------------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|
| Linear Regression | (±) | (±) | (±) | (±) | (±) | (±) | (±)| (±) | (±) | (±) | (±) |
| IWERM (optimal) | (±) | (±) | (±) | (±) | (±) | (±) | (±)| (±)| (±)| (±) | (±) |
| RIWERM (alpha=0.25) |(±) | (±) | (±)| (±) | (±) | (±) | (±) |(±) | (±) | (±) | (±)|
| RIWERM (alpha=0.5) | (±) | (±) | (±)| (±) | (±) | (±) | (±) |(±)| (±) | (±)| (±)|
| RIWERM (alpha=0.75) | (±) | (±) | (±)| (±) | (±) | (±)| (±) |(±)| (±)| (±) | (±) |
| Models | W=7.0x10<sup>4</sup> | W=7.4x10<sup>4</sup> | W=8x10<sup>4</sup> | W=8.6x10<sup>4</sup> | W=1.0x10<sup>5</sup> | W=1.1x10<sup>5</sup> | W=1.3x10<sup>5</sup> | W=1.4x10<sup>5</sup> | W=1.6x10<sup>5</sup> |
|-------------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|----------------|
| Linear Regression | 28968(±16869) | 48801(±7800) | 56422(±14803) | 64515(±11119) | 66874(±8715) | 72821(±8722) | 96922(±587)| 136737(±52) | 160761(±41) |
| IWERM (optimal) | 9400(±51) | 9336(±60) | 9250(±87) | 9211(±69) | 9142(±47) | 9096(±43) | 9070(±63)| 43724(±33871)| 152675(±339)|
| RIWERM (alpha=0.25) | 11252(±124) | 11149(±146) | 10981(±200)| 10962(±195) | 10862(±154) | 10960(±157) | 11206(±247) | 39784(±26672) | 152675(±339) |
| RIWERM (alpha=0.5) | 10551(±102) | 10456(±124) | 10306(±178)| 10289(±154) | 10199(±126) | 10247(±124) | 10400(±195) | 41988(±30706)| 152675(±339) |
| RIWERM (alpha=0.75) | 10012(±81) | 9929(±99) | 9803(±147)| 9779(±119) | 9700(±97) | 9709(±93)| 9784(±147) | 43058(±32670)| 152675(±339)|

![](../assets/benchmarks/sumprices_regression_iwerm_target_shift.png)

## Template

Expand Down
171 changes: 171 additions & 0 deletions benchmarks/sumprices_tabular_iwerm_target_shift.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import os
import sys

sys.path.append(os.path.join(os.path.dirname(__file__), ".."))

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RANSACRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import HuberRegressor
from sklearn.linear_model import TheilSenRegressor
from sklearn.metrics import mean_absolute_error as mae
from scipy.stats import norm
from scipy.stats import wasserstein_distance
import tqdm
from shift28m.datasets import SumPricesRegression


sns.set_style("whitegrid")


dataset = SumPricesRegression()

n_trials = 20
train_sample_size = 100000
test_sample_size = 100000
test_mu = 80
test_sigma = 10

n_trials = 20
train_sample_size = 100000
test_sample_size = 100000
test_mu = 180000
test_sigma = 10000

shifts = [
{"train_mu": 180000, "train_sigma": 100000},
{"train_mu": 160000, "train_sigma": 100000},
{"train_mu": 140000, "train_sigma": 100000},
{"train_mu": 120000, "train_sigma": 100000},
{"train_mu": 100000, "train_sigma": 100000},
{"train_mu": 80000, "train_sigma": 100000},
{"train_mu": 60000, "train_sigma": 100000},
{"train_mu": 40000, "train_sigma": 100000},
{"train_mu": 20000, "train_sigma": 100000},
]

models = [
{"model": LinearRegression, "sample_weights": "ERM"},
{"model": LinearRegression, "sample_weights": "IWERM (optimal)"},
{
"model": LinearRegression,
"sample_weights": r"RIWERM ($\alpha=0.25$)",
"alpha": 0.25,
},
{
"model": LinearRegression,
"sample_weights": r"RIWERM ($\alpha=0.5$)",
"alpha": 0.5,
},
{
"model": LinearRegression,
"sample_weights": r"RIWERM ($\alpha=0.75$)",
"alpha": 0.75,
},
]

models_errors_mean = []
models_errors_std = []
dists = []

for k in range(len(models)):
model = models[k]["model"]
weighting = models[k]["sample_weights"]

model_errors_mean = []
model_errors_std = []
model_dists = []
for shift in shifts:
errors = []

rv_train = np.random.normal(shift["train_mu"], shift["train_sigma"], 10000)
rv_test = np.random.normal(test_mu, test_sigma, 10000)
wd = wasserstein_distance(rv_train, rv_test)
model_dists.append(wd)

for i in tqdm.tqdm(range(n_trials)):
(x_train, y_train), (x_test, y_test) = dataset.load_dataset(
target_shift=True,
train_size=train_sample_size,
test_size=test_sample_size,
test_mu=test_mu,
test_sigma=test_sigma,
train_mu=shift["train_mu"],
train_sigma=shift["train_sigma"],
random_seed=i,
)

x_train = x_train.reshape(-1, 1)
x_test = x_test.reshape(-1, 1)

p_tr = norm.pdf(y_train, loc=shift["train_mu"], scale=shift["train_sigma"])
p_te = norm.pdf(y_train, loc=test_mu, scale=test_sigma)

p_tr = p_tr.reshape(-1)
p_te = p_te.reshape(-1)

reg = model()
if weighting == "ERM":
reg.fit(x_train, y_train)
elif weighting.split()[0] == "IWERM":
reg.fit(x_train, y_train, sample_weight=p_te / (p_tr + 1e-9))
elif weighting.split()[0] == "AIWERM":
alpha = float(models[k]["alpha"])
w = (p_te / (p_tr + 1e-9)) ** alpha
reg.fit(x_train, y_train, sample_weight=w)
elif weighting.split()[0] == "RIWERM":
alpha = float(models[k]["alpha"])
w = p_te / ((1 - alpha) * p_te + alpha * p_tr + 1e-9)
reg.fit(x_train, y_train, sample_weight=w)

errors.append(mae(reg.predict(x_test), y_test))

model_errors_mean.append(np.mean(errors))
model_errors_std.append(np.std(errors))
print(model_errors_mean)
print(model_errors_std)
models_errors_mean.append(model_errors_mean)
models_errors_std.append(model_errors_std)
dists.append(model_dists)

models_errors_mean = np.array(models_errors_mean)
models_errors_std = np.array(models_errors_std)

colors = ["purple", "green", "blue", "darkcyan", "red"]

print(dists)
print(models_errors_mean)
print(models_errors_std)

fig = plt.figure(figsize=(12, 6))
for i in range(len(models)):
plt.plot(
dists[i],
models_errors_mean[i],
alpha=0.8,
color=colors[i],
label=models[i]["sample_weights"],
)
plt.fill_between(
dists[i],
models_errors_mean[i],
models_errors_mean[i] + models_errors_std[i],
alpha=0.2,
color=colors[i],
)
plt.fill_between(
dists[i],
models_errors_mean[i],
models_errors_mean[i] - models_errors_std[i],
alpha=0.2,
color=colors[i],
)

plt.legend()
plt.xlabel(r"$W_1(P_{train}, P_{test})$")
plt.ylabel("MAE")
plt.savefig("sumprices_regression_iwerm_target_shift.png")
plt.show()

0 comments on commit 9154ae1

Please sign in to comment.