Skip to content

Commit

Permalink
Initial commit.
Browse files Browse the repository at this point in the history
  • Loading branch information
instance01 committed May 11, 2021
0 parents commit 7299460
Show file tree
Hide file tree
Showing 31 changed files with 858 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
__pycache__
*.swp
*.bak
39 changes: 39 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
import copy
import json


class Config:
def __init__(self, base_path=''):
with open(base_path + 'simulations.json', 'r') as f:
self.cfg = json.load(f)

def _update_cfg(self, base_cfg, new_cfg):
# We support one level for now.
for k in new_cfg.keys():
if k == 'base_cfg' or k == 'desc' or k == 'dataset_id':
continue
base_cfg[k].update(new_cfg[k])
base_cfg['dataset_id'] = new_cfg['dataset_id']

def get_cfg(self, cfg_id):
if cfg_id not in self.cfg:
raise Exception(
'Error: Key %s does not exist in simulations.json.' % cfg_id
)

initial_base_cfg = self.cfg["1"]
base_cfg = self.cfg[self.cfg[cfg_id].get('base_cfg', cfg_id)]
# All base configs are based on config "1".
# This enables backwards compatibility when new options are added.
self._update_cfg(initial_base_cfg, base_cfg)

cfg = copy.deepcopy(initial_base_cfg)
self._update_cfg(cfg, self.cfg[cfg_id])

cfg['cfg_id'] = cfg_id
return cfg


if __name__ == '__main__':
cfg = Config()
print('\n'.join(cfg.cfg.keys()))
Binary file added datasets/1data.npy
Binary file not shown.
Binary file added datasets/2data.npy
Binary file not shown.
Binary file added datasets/3data.npy
Binary file not shown.
Binary file added datasets/4data.npy
Binary file not shown.
Binary file added datasets/5data.npy
Binary file not shown.
Binary file added datasets/7_high2_2000_lndata.npy
Binary file not shown.
Binary file added datasets/7_high2_2000data.npy
Binary file not shown.
Binary file added datasets/7_high2_200data.npy
Binary file not shown.
Binary file added datasets/7_high2_500data.npy
Binary file not shown.
Binary file added datasets/7_high2_50data.npy
Binary file not shown.
59 changes: 59 additions & 0 deletions logger.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import json

import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib as mpl

from torch.utils.tensorboard import SummaryWriter


mpl.font_manager._rebuild()
plt.rc('font', family='Raleway')


def truncate_colormap(cmapIn='jet', minval=0.0, maxval=1.0, n=100):
cmapIn = plt.get_cmap(cmapIn)

new_cmap = colors.LinearSegmentedColormap.from_list(
'trunc({n},{a:.2f},{b:.2f})'.format(n=cmapIn.name, a=minval, b=maxval),
cmapIn(np.linspace(minval, maxval, n)))

return new_cmap


class Logger:
def __init__(self, model_fname, cfg):
self.cfg = cfg
self.model_fname = model_fname
self.writer = SummaryWriter(log_dir='runs/' + model_fname)

def log_config(self):
self.writer.add_text('Info/Config', json.dumps(self.cfg), 0)

def log_train(self, data, n_iter):
self.writer.add_scalar('Loss/Train', data['loss_train'], n_iter)

def log_eval(self, data, n_iter):
self.writer.add_scalar('Loss/Eval', data['loss_eval'], n_iter)
for k, v in data['Problem_Misclassifications'].items():
self.writer.add_scalar(
'Problem_Misclassifications/' + k,
v,
n_iter
)
self.writer.add_scalar(
'Total_Misclassifications',
data['Total_Misclassifications'],
n_iter
)

def log_eval_reverse(self, data, n_iter):
self.writer.add_scalar('Loss/Eval', data['loss_eval'], n_iter)

def log_custom_reverse_kpi(self, kpi, data, n_iter):
self.writer.add_scalar('Custom/' + kpi, data, n_iter)

def close(self):
self.writer.close()
117 changes: 117 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import pickle
import random
import datetime
import argparse

import torch
import numpy as np
from torch.utils.data import TensorDataset, DataLoader

from config import Config
from logger import Logger
from models import RegressionOptimizer


def get_model_fname(cfg):
rand_str = str(int(random.random() * 9e6))
model_fname = "-".join([
datetime.datetime.now().strftime("%y-%m-%d_%H:%M:%S"),
rand_str,
cfg['cfg_id']
])
return model_fname


def setup_data_loader(cfg, data):
X = data[0]
y = data[1]

tensor_x = torch.Tensor(X)
tensor_y = torch.Tensor(y)

dataset = TensorDataset(tensor_x, tensor_y)

batch_size = cfg['model']['batch_size']
train_eval_split = cfg['model']['train_eval_split']
train_size = int(train_eval_split * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(
dataset, [train_size, test_size]
)
train_data_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=cfg['model']['shuffle_data'],
num_workers=0
)
test_data_loader = DataLoader(
test_dataset,
batch_size=1,
num_workers=0
)

return train_data_loader, test_data_loader


def run_experiment(cfg_id, n_runs=1):
cfg = Config().get_cfg(cfg_id)
with open("datasets/" + cfg['dataset_id'] + "data.npy", "rb") as f:
data = pickle.load(f)

train_data_loader, test_data_loader = setup_data_loader(cfg, data)

for _ in range(n_runs):
model_fname = get_model_fname(cfg)
logger = Logger(model_fname, cfg)
logger.log_config()
optimizer = RegressionOptimizer(
cfg, train_data_loader, test_data_loader, logger
)
optimizer.train()
logger.close()


def gen_data(cfg_id):
cfg = Config().get_cfg(cfg_id)
high2 = cfg['problems']['high2']
n = cfg['problems']['n_problems']

X = np.random.randint(1, high2, size=(n, 2))
X = np.unique(X, axis=0)
y = X[:, 0] * X[:, 1]

# TODO: Do this better..
X = X.astype(float)
y = y.astype(float)

if cfg['problems']['log_norm']:
X = np.log(X)
y = np.log(y)

max_val = max(X.max(), y.max())
print(max_val)
X /= max_val
y /= max_val
else:
X /= high2
y /= high2 * 10.

print(X)
print(y)

with open("datasets/" + cfg['cfg_id'] + "data.npy", "wb") as f:
pickle.dump((X, y), f)


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--gendata", action="store_true")
parser.add_argument("--train", action="store_true")
parser.add_argument("-c", "--cfg_id", nargs=None, help="cfg_id")
parser.add_argument("-n", "--nruns", nargs="?", type=int, default=1)
args = parser.parse_args()

if args.gendata:
gen_data(args.cfg_id)
elif args.train:
run_experiment(args.cfg_id, args.nruns)
160 changes: 160 additions & 0 deletions models.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import sys

import torch
import torch.nn as nn
import torch.optim as optim


class ReverseFCNet(nn.Module):
def __init__(self, cfg, output_size):
super(ReverseFCNet, self).__init__()
input_size = cfg['problems']['input_size']

activation_type = cfg['model']['activation']
if activation_type == "ReLU":
activation_cls = nn.ReLU
elif activation_type == "ELU":
activation_cls = nn.ELU
elif activation_type == "LeakyReLU":
activation_cls = nn.LeakyReLU

fc_sizes = cfg['model']['fc_sizes'] + [output_size]

net = []
last_fc_size = input_size
for size in fc_sizes:
net.append(nn.Linear(last_fc_size, size))
net.append(activation_cls())
last_fc_size = size

# net[0].weight.data.fill_(1.0)
# net[0].bias.data.fill_(0.0)

net.pop(-1)
self.fc_net = nn.Sequential(*net)
print(self.fc_net)

def forward(self, x):
# x = torch.flatten(x, 1)
return self.fc_net(x)


class RegressionOptimizer:
def __init__(self, cfg, train_data_loader, test_data_loader, logger):
self.cfg = cfg
self.train_data_loader = train_data_loader
self.test_data_loader = test_data_loader
self.logger = logger

# Load cfg variables.
lr = cfg['model']['lr']
sgd_momentum = cfg['model']['optimizer_sgd_momentum']
self.batch_size = cfg['model']['batch_size']
self.n_epochs = cfg['model']['n_epochs']
self.train_eval_split = cfg['model']['train_eval_split']

# Set it all up.
# TODO 1 is hardcoded.
self.net = ReverseFCNet(cfg, 1)
self.criterion = nn.MSELoss()
if cfg['model']['optimizer'] == 'sgd':
self.optimizer = optim.SGD(
self.net.parameters(), lr=lr, momentum=sgd_momentum
)
elif cfg['model']['optimizer'] == 'Adam':
self.optimizer = optim.Adam(
self.net.parameters(), lr=lr
)

def train(self):
self.net.train()
data_len = len(self.train_data_loader)
for epoch in range(self.n_epochs):
batch_loss = 0.
for i, data in enumerate(self.train_data_loader):
inputs, labels = data

self.optimizer.zero_grad()

outputs = self.net(inputs.float())

loss = self.criterion(outputs.T, labels.float())
loss.backward()
self.optimizer.step()

# TODO!!! What if batch_size is not a factor of total size.
# Then the last term will be wrong.
batch_loss += loss.item() * self.batch_size
if i % 1000 == 0:
avg_loss = batch_loss / (i + 1)
msg = '[%d, %5d] loss: %.3f' % (epoch + 1, i, avg_loss)
sys.stdout.write('\r' + msg)
sys.stdout.flush()

# if i % 1000 == 0:
# for param in self.net.parameters():
# print(param.data)
# # print(param.shape)
# print('')

if i % 1000 == 0:
data = {
"loss_train": batch_loss / (i + 1)
}
self.logger.log_train(data, data_len * epoch + i)

self.net.eval()
data = {}
test_loss = self.eval(epoch, do_print=False, debug=epoch % 10 == 0)
data['loss_eval'] = test_loss
self.logger.log_eval_reverse(data, epoch)
self.net.train()
print('')

def eval(self, epoch, do_print=True, debug=False):
sse = 0
ssm_mean = None
n = 0

self.net.eval()
total_loss = 0.0
for i, data in enumerate(self.test_data_loader):
inputs, labels = data
outputs = self.net(inputs.float())

loss = self.criterion(outputs, labels)

sse += ((labels.numpy() - outputs[0].detach().numpy()) ** 2).sum()
if ssm_mean is None:
ssm_mean = labels.numpy()
else:
ssm_mean += labels.numpy()
n += 1

total_loss += loss.item()
if do_print and i % 1000 == 0:
msg = '[%d] loss: %.3f' % (i, total_loss / (i + 1))
sys.stdout.write('\r' + msg)
sys.stdout.flush()

ssm_mean /= n
ssm = 0
for i, data in enumerate(self.test_data_loader):
inputs, labels = data
ssm += ((labels.numpy() - ssm_mean) ** 2).sum()
R2 = 1 - (sse / ssm)
print(" ", sse, ssm)
print("R2", R2)

self.logger.log_custom_reverse_kpi("R2", R2, epoch)

data_len = len(self.test_data_loader)

return total_loss / data_len

def save(self, model_fname):
torch.save(self.net.state_dict(), 'models/' + model_fname)

def load(self, model_fname, output_size):
self.net = ReverseFCNet(self.cfg, output_size)
self.net.load_state_dict(torch.load(model_fname))
Binary file added plots/comp.pdf
Binary file not shown.
Binary file added plots/comp.pickle
Binary file not shown.
Binary file added plots/comp.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added plots/comp2.pdf
Binary file not shown.
Binary file added plots/comp2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit 7299460

Please sign in to comment.