-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 7299460
Showing
31 changed files
with
858 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
__pycache__ | ||
*.swp | ||
*.bak |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
import copy | ||
import json | ||
|
||
|
||
class Config: | ||
def __init__(self, base_path=''): | ||
with open(base_path + 'simulations.json', 'r') as f: | ||
self.cfg = json.load(f) | ||
|
||
def _update_cfg(self, base_cfg, new_cfg): | ||
# We support one level for now. | ||
for k in new_cfg.keys(): | ||
if k == 'base_cfg' or k == 'desc' or k == 'dataset_id': | ||
continue | ||
base_cfg[k].update(new_cfg[k]) | ||
base_cfg['dataset_id'] = new_cfg['dataset_id'] | ||
|
||
def get_cfg(self, cfg_id): | ||
if cfg_id not in self.cfg: | ||
raise Exception( | ||
'Error: Key %s does not exist in simulations.json.' % cfg_id | ||
) | ||
|
||
initial_base_cfg = self.cfg["1"] | ||
base_cfg = self.cfg[self.cfg[cfg_id].get('base_cfg', cfg_id)] | ||
# All base configs are based on config "1". | ||
# This enables backwards compatibility when new options are added. | ||
self._update_cfg(initial_base_cfg, base_cfg) | ||
|
||
cfg = copy.deepcopy(initial_base_cfg) | ||
self._update_cfg(cfg, self.cfg[cfg_id]) | ||
|
||
cfg['cfg_id'] = cfg_id | ||
return cfg | ||
|
||
|
||
if __name__ == '__main__': | ||
cfg = Config() | ||
print('\n'.join(cfg.cfg.keys())) |
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
import json | ||
|
||
import numpy as np | ||
import matplotlib | ||
import matplotlib.pyplot as plt | ||
import matplotlib.colors as colors | ||
import matplotlib as mpl | ||
|
||
from torch.utils.tensorboard import SummaryWriter | ||
|
||
|
||
mpl.font_manager._rebuild() | ||
plt.rc('font', family='Raleway') | ||
|
||
|
||
def truncate_colormap(cmapIn='jet', minval=0.0, maxval=1.0, n=100): | ||
cmapIn = plt.get_cmap(cmapIn) | ||
|
||
new_cmap = colors.LinearSegmentedColormap.from_list( | ||
'trunc({n},{a:.2f},{b:.2f})'.format(n=cmapIn.name, a=minval, b=maxval), | ||
cmapIn(np.linspace(minval, maxval, n))) | ||
|
||
return new_cmap | ||
|
||
|
||
class Logger: | ||
def __init__(self, model_fname, cfg): | ||
self.cfg = cfg | ||
self.model_fname = model_fname | ||
self.writer = SummaryWriter(log_dir='runs/' + model_fname) | ||
|
||
def log_config(self): | ||
self.writer.add_text('Info/Config', json.dumps(self.cfg), 0) | ||
|
||
def log_train(self, data, n_iter): | ||
self.writer.add_scalar('Loss/Train', data['loss_train'], n_iter) | ||
|
||
def log_eval(self, data, n_iter): | ||
self.writer.add_scalar('Loss/Eval', data['loss_eval'], n_iter) | ||
for k, v in data['Problem_Misclassifications'].items(): | ||
self.writer.add_scalar( | ||
'Problem_Misclassifications/' + k, | ||
v, | ||
n_iter | ||
) | ||
self.writer.add_scalar( | ||
'Total_Misclassifications', | ||
data['Total_Misclassifications'], | ||
n_iter | ||
) | ||
|
||
def log_eval_reverse(self, data, n_iter): | ||
self.writer.add_scalar('Loss/Eval', data['loss_eval'], n_iter) | ||
|
||
def log_custom_reverse_kpi(self, kpi, data, n_iter): | ||
self.writer.add_scalar('Custom/' + kpi, data, n_iter) | ||
|
||
def close(self): | ||
self.writer.close() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
import pickle | ||
import random | ||
import datetime | ||
import argparse | ||
|
||
import torch | ||
import numpy as np | ||
from torch.utils.data import TensorDataset, DataLoader | ||
|
||
from config import Config | ||
from logger import Logger | ||
from models import RegressionOptimizer | ||
|
||
|
||
def get_model_fname(cfg): | ||
rand_str = str(int(random.random() * 9e6)) | ||
model_fname = "-".join([ | ||
datetime.datetime.now().strftime("%y-%m-%d_%H:%M:%S"), | ||
rand_str, | ||
cfg['cfg_id'] | ||
]) | ||
return model_fname | ||
|
||
|
||
def setup_data_loader(cfg, data): | ||
X = data[0] | ||
y = data[1] | ||
|
||
tensor_x = torch.Tensor(X) | ||
tensor_y = torch.Tensor(y) | ||
|
||
dataset = TensorDataset(tensor_x, tensor_y) | ||
|
||
batch_size = cfg['model']['batch_size'] | ||
train_eval_split = cfg['model']['train_eval_split'] | ||
train_size = int(train_eval_split * len(dataset)) | ||
test_size = len(dataset) - train_size | ||
train_dataset, test_dataset = torch.utils.data.random_split( | ||
dataset, [train_size, test_size] | ||
) | ||
train_data_loader = DataLoader( | ||
train_dataset, | ||
batch_size=batch_size, | ||
shuffle=cfg['model']['shuffle_data'], | ||
num_workers=0 | ||
) | ||
test_data_loader = DataLoader( | ||
test_dataset, | ||
batch_size=1, | ||
num_workers=0 | ||
) | ||
|
||
return train_data_loader, test_data_loader | ||
|
||
|
||
def run_experiment(cfg_id, n_runs=1): | ||
cfg = Config().get_cfg(cfg_id) | ||
with open("datasets/" + cfg['dataset_id'] + "data.npy", "rb") as f: | ||
data = pickle.load(f) | ||
|
||
train_data_loader, test_data_loader = setup_data_loader(cfg, data) | ||
|
||
for _ in range(n_runs): | ||
model_fname = get_model_fname(cfg) | ||
logger = Logger(model_fname, cfg) | ||
logger.log_config() | ||
optimizer = RegressionOptimizer( | ||
cfg, train_data_loader, test_data_loader, logger | ||
) | ||
optimizer.train() | ||
logger.close() | ||
|
||
|
||
def gen_data(cfg_id): | ||
cfg = Config().get_cfg(cfg_id) | ||
high2 = cfg['problems']['high2'] | ||
n = cfg['problems']['n_problems'] | ||
|
||
X = np.random.randint(1, high2, size=(n, 2)) | ||
X = np.unique(X, axis=0) | ||
y = X[:, 0] * X[:, 1] | ||
|
||
# TODO: Do this better.. | ||
X = X.astype(float) | ||
y = y.astype(float) | ||
|
||
if cfg['problems']['log_norm']: | ||
X = np.log(X) | ||
y = np.log(y) | ||
|
||
max_val = max(X.max(), y.max()) | ||
print(max_val) | ||
X /= max_val | ||
y /= max_val | ||
else: | ||
X /= high2 | ||
y /= high2 * 10. | ||
|
||
print(X) | ||
print(y) | ||
|
||
with open("datasets/" + cfg['cfg_id'] + "data.npy", "wb") as f: | ||
pickle.dump((X, y), f) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--gendata", action="store_true") | ||
parser.add_argument("--train", action="store_true") | ||
parser.add_argument("-c", "--cfg_id", nargs=None, help="cfg_id") | ||
parser.add_argument("-n", "--nruns", nargs="?", type=int, default=1) | ||
args = parser.parse_args() | ||
|
||
if args.gendata: | ||
gen_data(args.cfg_id) | ||
elif args.train: | ||
run_experiment(args.cfg_id, args.nruns) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
import sys | ||
|
||
import torch | ||
import torch.nn as nn | ||
import torch.optim as optim | ||
|
||
|
||
class ReverseFCNet(nn.Module): | ||
def __init__(self, cfg, output_size): | ||
super(ReverseFCNet, self).__init__() | ||
input_size = cfg['problems']['input_size'] | ||
|
||
activation_type = cfg['model']['activation'] | ||
if activation_type == "ReLU": | ||
activation_cls = nn.ReLU | ||
elif activation_type == "ELU": | ||
activation_cls = nn.ELU | ||
elif activation_type == "LeakyReLU": | ||
activation_cls = nn.LeakyReLU | ||
|
||
fc_sizes = cfg['model']['fc_sizes'] + [output_size] | ||
|
||
net = [] | ||
last_fc_size = input_size | ||
for size in fc_sizes: | ||
net.append(nn.Linear(last_fc_size, size)) | ||
net.append(activation_cls()) | ||
last_fc_size = size | ||
|
||
# net[0].weight.data.fill_(1.0) | ||
# net[0].bias.data.fill_(0.0) | ||
|
||
net.pop(-1) | ||
self.fc_net = nn.Sequential(*net) | ||
print(self.fc_net) | ||
|
||
def forward(self, x): | ||
# x = torch.flatten(x, 1) | ||
return self.fc_net(x) | ||
|
||
|
||
class RegressionOptimizer: | ||
def __init__(self, cfg, train_data_loader, test_data_loader, logger): | ||
self.cfg = cfg | ||
self.train_data_loader = train_data_loader | ||
self.test_data_loader = test_data_loader | ||
self.logger = logger | ||
|
||
# Load cfg variables. | ||
lr = cfg['model']['lr'] | ||
sgd_momentum = cfg['model']['optimizer_sgd_momentum'] | ||
self.batch_size = cfg['model']['batch_size'] | ||
self.n_epochs = cfg['model']['n_epochs'] | ||
self.train_eval_split = cfg['model']['train_eval_split'] | ||
|
||
# Set it all up. | ||
# TODO 1 is hardcoded. | ||
self.net = ReverseFCNet(cfg, 1) | ||
self.criterion = nn.MSELoss() | ||
if cfg['model']['optimizer'] == 'sgd': | ||
self.optimizer = optim.SGD( | ||
self.net.parameters(), lr=lr, momentum=sgd_momentum | ||
) | ||
elif cfg['model']['optimizer'] == 'Adam': | ||
self.optimizer = optim.Adam( | ||
self.net.parameters(), lr=lr | ||
) | ||
|
||
def train(self): | ||
self.net.train() | ||
data_len = len(self.train_data_loader) | ||
for epoch in range(self.n_epochs): | ||
batch_loss = 0. | ||
for i, data in enumerate(self.train_data_loader): | ||
inputs, labels = data | ||
|
||
self.optimizer.zero_grad() | ||
|
||
outputs = self.net(inputs.float()) | ||
|
||
loss = self.criterion(outputs.T, labels.float()) | ||
loss.backward() | ||
self.optimizer.step() | ||
|
||
# TODO!!! What if batch_size is not a factor of total size. | ||
# Then the last term will be wrong. | ||
batch_loss += loss.item() * self.batch_size | ||
if i % 1000 == 0: | ||
avg_loss = batch_loss / (i + 1) | ||
msg = '[%d, %5d] loss: %.3f' % (epoch + 1, i, avg_loss) | ||
sys.stdout.write('\r' + msg) | ||
sys.stdout.flush() | ||
|
||
# if i % 1000 == 0: | ||
# for param in self.net.parameters(): | ||
# print(param.data) | ||
# # print(param.shape) | ||
# print('') | ||
|
||
if i % 1000 == 0: | ||
data = { | ||
"loss_train": batch_loss / (i + 1) | ||
} | ||
self.logger.log_train(data, data_len * epoch + i) | ||
|
||
self.net.eval() | ||
data = {} | ||
test_loss = self.eval(epoch, do_print=False, debug=epoch % 10 == 0) | ||
data['loss_eval'] = test_loss | ||
self.logger.log_eval_reverse(data, epoch) | ||
self.net.train() | ||
print('') | ||
|
||
def eval(self, epoch, do_print=True, debug=False): | ||
sse = 0 | ||
ssm_mean = None | ||
n = 0 | ||
|
||
self.net.eval() | ||
total_loss = 0.0 | ||
for i, data in enumerate(self.test_data_loader): | ||
inputs, labels = data | ||
outputs = self.net(inputs.float()) | ||
|
||
loss = self.criterion(outputs, labels) | ||
|
||
sse += ((labels.numpy() - outputs[0].detach().numpy()) ** 2).sum() | ||
if ssm_mean is None: | ||
ssm_mean = labels.numpy() | ||
else: | ||
ssm_mean += labels.numpy() | ||
n += 1 | ||
|
||
total_loss += loss.item() | ||
if do_print and i % 1000 == 0: | ||
msg = '[%d] loss: %.3f' % (i, total_loss / (i + 1)) | ||
sys.stdout.write('\r' + msg) | ||
sys.stdout.flush() | ||
|
||
ssm_mean /= n | ||
ssm = 0 | ||
for i, data in enumerate(self.test_data_loader): | ||
inputs, labels = data | ||
ssm += ((labels.numpy() - ssm_mean) ** 2).sum() | ||
R2 = 1 - (sse / ssm) | ||
print(" ", sse, ssm) | ||
print("R2", R2) | ||
|
||
self.logger.log_custom_reverse_kpi("R2", R2, epoch) | ||
|
||
data_len = len(self.test_data_loader) | ||
|
||
return total_loss / data_len | ||
|
||
def save(self, model_fname): | ||
torch.save(self.net.state_dict(), 'models/' + model_fname) | ||
|
||
def load(self, model_fname, output_size): | ||
self.net = ReverseFCNet(self.cfg, output_size) | ||
self.net.load_state_dict(torch.load(model_fname)) |
Binary file not shown.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file not shown.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Oops, something went wrong.