From df3a04f857cb22f10f40a550c7ffa19d54fcf076 Mon Sep 17 00:00:00 2001 From: Giorgio Morales Luna Date: Sun, 21 Jul 2024 09:18:40 -0600 Subject: [PATCH] Added additional NN architectures --- README.md | 3 +- setup.py | 2 +- src/PredictionIntervals/Trainer/TrainNN.py | 12 +++- src/PredictionIntervals/models/NNModel.py | 9 ++- src/PredictionIntervals/models/network.py | 72 ++++++++++++++++++++++ 5 files changed, 92 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index f07d4dc9..a9210b83 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,8 @@ First, create an instance of the class `Trainer`. * `Xval`: Validation input data. 2-D numpy array, shape (#samples, #features) * `Yval`: Validation target data. 1-D numpy array, shape (#samples, #features) * `method`: PI-generation method. Options: 'DualAQD' or '[MCDropout](https://arxiv.org/pdf/1709.01907.pdf)' -* `normData`: If True, apply z-score normalization to the inputs and min-max normalization to the outputs +* `architecture`: Type of NN model to be used. Options: ['shallow' (Default, 2 hidden layers), 'deep' (3 hidden layers), 'deeper' (5 hidden layers)] +* `normData`: If True, apply z-score normalization to the inputs and min-max normalization to the outputs **Note**: Normalization is applied to the training set; then, the exact same scaling is applied to the validation set. diff --git a/setup.py b/setup.py index 2f4678e6..5deecb3b 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='PredictionIntervals', - version='0.0.1', + version='0.1.0', author='Giorgio Morales - Montana State University', author_email='giorgiomoralesluna@gmail.com', description='DualAQD: Dual Accuracy-quality-driven Prediction Intervals', diff --git a/src/PredictionIntervals/Trainer/TrainNN.py b/src/PredictionIntervals/Trainer/TrainNN.py index be3e911f..0697eb86 100644 --- a/src/PredictionIntervals/Trainer/TrainNN.py +++ b/src/PredictionIntervals/Trainer/TrainNN.py @@ -1,3 +1,5 @@ +import sys + import torch import pickle from ..utils import * @@ -6,7 +8,7 @@ class Trainer: def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, method: str = 'DualAQD', - normData: bool = True): + architecture: str = 'shallow', normData: bool = True): """ Train a PI-generation NN using DualAQD :param X: Input data (explainable variables). 2-D numpy array, shape (#samples, #features) @@ -14,6 +16,8 @@ def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, met :param Xval: Validation input data. 2-D numpy array, shape (#samples, #features) :param Yval: Validation target data. 1-D numpy array, shape (#samples, #features) :param method: PI-generation method. Options: 'DualAQD' or 'MCDropout' + :param architecture: Type of NN model to be used. Options: ['shallow' (2 hidden layers), + 'deep' (3 hidden layers), 'deeper' (5 hidden layers)] :param normData: If True, apply z-score normalization to the inputs and min-max normalization to the outputs """ # Class variables @@ -25,6 +29,10 @@ def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, met self.name = 'temp_' + method # Save the model in a temp folder # Configure model self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + if architecture not in ['shallow', 'deep', 'deeper']: + sys.exit("For now, the only architecture options available are: ['shallow' (2 hidden layers), " + "'deep' (3 hidden layers), 'deeper' (5 hidden layers)]") + self.architecture = architecture self.model = self.reset_model() self.f = self._set_folder() @@ -34,7 +42,7 @@ def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, met self.set_data(X, Y, Xval, Yval) def reset_model(self): - return NNModel(device=self.device, nfeatures=self.n_features, method=self.method) + return NNModel(device=self.device, nfeatures=self.n_features, method=self.method, architecture=self.architecture) def set_data(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array): if self.normData: diff --git a/src/PredictionIntervals/models/NNModel.py b/src/PredictionIntervals/models/NNModel.py index 823f3a7d..19dcbe7c 100644 --- a/src/PredictionIntervals/models/NNModel.py +++ b/src/PredictionIntervals/models/NNModel.py @@ -190,7 +190,7 @@ def __init__(self, model, criterion, optimizer): class NNModel: - def __init__(self, device, nfeatures, method): + def __init__(self, device, nfeatures, method, architecture): self.method = method self.device = device self.nfeatures = nfeatures @@ -206,7 +206,12 @@ def __init__(self, device, nfeatures, method): self.output_size = 1 criterion = nn.MSELoss() - network = NN(input_shape=self.nfeatures, output_size=self.output_size) + if architecture == 'shallow': + network = NN(input_shape=self.nfeatures, output_size=self.output_size) + elif architecture == 'deep': + network = NN2(input_shape=self.nfeatures, output_size=self.output_size) + else: + network = NN3(input_shape=self.nfeatures, output_size=self.output_size) network.to(self.device) # Training parameters optimizer = optim.Adadelta(network.parameters(), lr=0.1) diff --git a/src/PredictionIntervals/models/network.py b/src/PredictionIntervals/models/network.py index 07884c58..c477f538 100644 --- a/src/PredictionIntervals/models/network.py +++ b/src/PredictionIntervals/models/network.py @@ -23,3 +23,75 @@ def forward(self, x): x = self.hidden_layer2(x) x = self.drop2(x) return self.out(x) + + +class NN2(nn.Module, ABC): + """Defines deep NN architecture (3 hidden layers)""" + + def __init__(self, input_shape: int = 10, output_size: int = 1): + """ + Initialize NN + :param input_shape: Input shape of the network. + :param output_size: Output shape of the network. + """ + super(NN2, self).__init__() + self.hidden_layer1 = nn.Sequential( + nn.Linear(in_features=input_shape, out_features=500), nn.ReLU()) + self.drop1 = nn.Dropout(p=0.01) + self.hidden_layer2 = nn.Sequential( + nn.Linear(in_features=500, out_features=100), nn.ReLU()) + self.drop2 = nn.Dropout(p=0.01) + self.hidden_layer3 = nn.Sequential( + nn.Linear(in_features=100, out_features=50), nn.ReLU()) + self.drop3 = nn.Dropout(p=0.01) + + # Number of outputs depends on the method + self.out = nn.Linear(50, output_size) + + def forward(self, x): + x = self.hidden_layer1(x) + x = self.drop1(x) + x = self.hidden_layer2(x) + x = self.drop2(x) + x = self.hidden_layer3(x) + x = self.drop3(x) + return self.out(x) + + +class NN3(nn.Module, ABC): + """Defines deeper NN architecture (5 hidden layers)""" + + def __init__(self, input_shape: int = 10, output_size: int = 1): + """ + Initialize NN + :param input_shape: Input shape of the network. + :param output_size: Output shape of the network. + """ + super(NN3, self).__init__() + self.hidden_layer1 = nn.Sequential( + nn.Linear(in_features=input_shape, out_features=200), nn.ReLU()) + self.drop1 = nn.Dropout(p=0.01) + self.hidden_layer2 = nn.Sequential( + nn.Linear(in_features=200, out_features=500), nn.ReLU()) + self.drop2 = nn.Dropout(p=0.01) + self.hidden_layer3 = nn.Sequential( + nn.Linear(in_features=500, out_features=500), nn.ReLU()) + self.drop3 = nn.Dropout(p=0.01) + self.hidden_layer4 = nn.Sequential( + nn.Linear(in_features=500, out_features=100), nn.ReLU()) + self.hidden_layer5 = nn.Sequential( + nn.Linear(in_features=100, out_features=50), nn.ReLU()) + + # Number of outputs depends on the method + self.out = nn.Linear(50, output_size) + + def forward(self, x): + x = self.hidden_layer1(x) + x = self.drop1(x) + x = self.hidden_layer2(x) + x = self.drop2(x) + x = self.hidden_layer3(x) + x = self.drop3(x) + x = self.hidden_layer4(x) + x = self.hidden_layer5(x) + return self.out(x)