Skip to content

Commit

Permalink
Added LSTM solver, and parameters for simulated dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
Jad-yehya committed Jul 8, 2024
1 parent c990a1e commit be01a83
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 6 deletions.
10 changes: 6 additions & 4 deletions datasets/simulated.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,12 @@ class Dataset(BaseDataset):
install_cmd = "conda"
requirements = ["scikit-learn"]

n_samples = 100
n_features = 5
noise = 0.1
n_anomaly = 90
parameters = {
"n_samples": [100],
"n_features": [5],
"noise": [0.1],
"n_anomaly": [90],
}

def get_data(self):
X_train, _ = make_regression(
Expand Down
4 changes: 2 additions & 2 deletions objective.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ def evaluate_result(self, y_hat):
zoloss = zero_one_loss(self.y_test, y_hat)

return {
"value": zoloss, # having zoloss twice because of the API
"zoloss": zoloss,
"precision": precision,
"recall": recall,
"f1": f1,
"zoloss": zoloss,
"value": zoloss, # having zoloss twice because of the API
}

def get_objective(self):
Expand Down
180 changes: 180 additions & 0 deletions solvers/lstm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
# LSTM Autoencoder
from benchopt import BaseSolver, safe_import_context

with safe_import_context() as import_ctx:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import DataLoader
from tqdm import tqdm


class LSTM_Autoencoder(nn.Module):
def __init__(self,
seq_len,
n_features, embedding_dim=64, enc_layers=1, dec_layers=1,):
super(LSTM_Autoencoder, self).__init__()
self.seq_len, self.n_features = seq_len, n_features
self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim

self.encoder = nn.LSTM(
input_size=n_features,
hidden_size=self.hidden_dim,
num_layers=enc_layers,
batch_first=True
)

self.decoder = nn.LSTM(
input_size=self.hidden_dim,
hidden_size=n_features,
num_layers=dec_layers,
batch_first=True
)

def forward(self, x):

x, (_, _) = self.encoder(x)
x, (_, _) = self.decoder(x)

return x


class Solver(BaseSolver):
name = "LSTM"

install_cmd = "pip"
requirements = ["torch", "tqdm"]

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

sampling_strategy = "run_once"

parameters = {
"embedding_dim": [64],
"batch_size": [32],
"n_epochs": [50],
"lr": [1e-5],
"window": [True],
"window_size": [128], # window_size = seq_len
"stride": [1],
"percentile": [97, 98, 99, 99.9],
"encoder_layers": [32],
"decoder_layers": [32],
}

def prepare_data(self, *data):
# return tensors on device
return (torch.tensor(
d, dtype=torch.float32, device=self.device)
for d in data)

def set_objective(self, X_train, y_test, X_test):
self.X_train = X_train
self.X_test, self.y_test = X_test, y_test
self.n_features = X_train.shape[1]
self.seq_len = self.window_size

print("Simulated data shape: ", X_train.shape, X_test.shape)

self.model = LSTM_Autoencoder(
self.seq_len,
self.n_features,
self.embedding_dim,
self.encoder_layers,
self.decoder_layers,
)
self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
self.criterion = nn.MSELoss()

if self.window:
if self.X_train is not None:
self.Xw_train = np.lib.stride_tricks.sliding_window_view(
self.X_train, window_shape=self.window_size, axis=0
)[::self.stride].transpose(0, 2, 1)

self.Xw_train = torch.tensor(
self.Xw_train, dtype=torch.float32
)

if self.X_test is not None:
self.Xw_test = np.lib.stride_tricks.sliding_window_view(
self.X_test, window_shape=self.window_size, axis=0
)[::self.stride].transpose(0, 2, 1)

self.Xw_test = torch.tensor(
self.Xw_test, dtype=torch.float32
)

if self.y_test is not None:
self.yw_test = np.lib.stride_tricks.sliding_window_view(
self.y_test, window_shape=self.window_size, axis=0
)[::self.stride]

self.yw_test = torch.tensor(
self.yw_test, dtype=torch.float32
)

self.train_loader = DataLoader(
self.Xw_train, batch_size=self.batch_size, shuffle=True,
)
self.test_loader = DataLoader(
self.Xw_test, batch_size=self.batch_size, shuffle=False,
)

def run(self, _):

self.model.to(self.device)
self.criterion.to(self.device)

ti = tqdm(range(self.n_epochs), desc="epoch", leave=True)

for epoch in ti:
self.model.train()
train_loss = 0
for i, x in enumerate(self.train_loader):

x = x.to(self.device)

self.optimizer.zero_grad()
x_hat = self.model(x)

loss = self.criterion(x, x_hat)
loss.backward()
self.optimizer.step()
train_loss += loss.item()
train_loss /= len(self.train_loader)

ti.set_postfix(train_loss=f"{train_loss:.5f}")

self.model.eval()
raw_reconstruction = []
for x in self.test_loader:

x = x.to(self.device)

x_hat = self.model(x)
raw_reconstruction.append(x_hat.detach().cpu().numpy())

raw_reconstruction = np.concatenate(raw_reconstruction, axis=0)

reconstructed_data = np.concatenate(
[raw_reconstruction[0], raw_reconstruction[1:, -1, :]], axis=0
)

reconstruction_err = np.mean(
np.abs(self.X_test - reconstructed_data), axis=1
)

self.y_hat = np.where(
reconstruction_err > np.percentile(
reconstruction_err, self.percentile), 1, 0
)

def skip(self, X_train, X_test, y_test):
if self.device != torch.device("cuda"):
return True, "CUDA is not available. Skipping this solver."
return False, None

def get_result(self):
return dict(y_hat=self.y_hat)
4 changes: 4 additions & 0 deletions test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,7 @@ def check_test_solver_install(solver_class):
if solver_class.name.lower() == "dif":
if get_cuda_version() is None:
pytest.xfail("Deep IsolationForest needs a working GPU hardware.")

if solver_class.name.lower() == "lstm":
if get_cuda_version() is None:
pytest.xfail("LSTM needs a working GPU hardware.")

0 comments on commit be01a83

Please sign in to comment.