From df3a04f857cb22f10f40a550c7ffa19d54fcf076 Mon Sep 17 00:00:00 2001
From: Giorgio Morales Luna <giorgiomoralesluna@gmail.com>
Date: Sun, 21 Jul 2024 09:18:40 -0600
Subject: [PATCH] Added additional NN architectures

---
 README.md                                  |  3 +-
 setup.py                                   |  2 +-
 src/PredictionIntervals/Trainer/TrainNN.py | 12 +++-
 src/PredictionIntervals/models/NNModel.py  |  9 ++-
 src/PredictionIntervals/models/network.py  | 72 ++++++++++++++++++++++
 5 files changed, 92 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index f07d4dc9..a9210b83 100644
--- a/README.md
+++ b/README.md
@@ -40,7 +40,8 @@ First, create an instance of the class `Trainer`.
 *   `Xval`: Validation input data. 2-D numpy array, shape (#samples, #features)
 *   `Yval`: Validation target data. 1-D numpy array, shape (#samples, #features)
 *   `method`: PI-generation method. Options: 'DualAQD' or '[MCDropout](https://arxiv.org/pdf/1709.01907.pdf)'
-*   `normData`: If True, apply z-score normalization to the inputs and min-max normalization to the outputs
+*  `architecture`: Type of NN model to be used. Options: ['shallow' (Default, 2 hidden layers), 'deep' (3 hidden layers), 'deeper' (5 hidden layers)]  
+* `normData`: If True, apply z-score normalization to the inputs and min-max normalization to the outputs
 
 **Note**: Normalization is applied to the training set; then, the exact same scaling is applied to the validation set.
 
diff --git a/setup.py b/setup.py
index 2f4678e6..5deecb3b 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setuptools.setup(
     name='PredictionIntervals',
-    version='0.0.1',
+    version='0.1.0',
     author='Giorgio Morales - Montana State University',
     author_email='giorgiomoralesluna@gmail.com',
     description='DualAQD: Dual Accuracy-quality-driven Prediction Intervals',
diff --git a/src/PredictionIntervals/Trainer/TrainNN.py b/src/PredictionIntervals/Trainer/TrainNN.py
index be3e911f..0697eb86 100644
--- a/src/PredictionIntervals/Trainer/TrainNN.py
+++ b/src/PredictionIntervals/Trainer/TrainNN.py
@@ -1,3 +1,5 @@
+import sys
+
 import torch
 import pickle
 from ..utils import *
@@ -6,7 +8,7 @@
 
 class Trainer:
     def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, method: str = 'DualAQD',
-                 normData: bool = True):
+                 architecture: str = 'shallow', normData: bool = True):
         """
         Train a PI-generation NN using DualAQD
         :param X: Input data (explainable variables). 2-D numpy array, shape (#samples, #features)
@@ -14,6 +16,8 @@ def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, met
         :param Xval: Validation input data. 2-D numpy array, shape (#samples, #features)
         :param Yval: Validation target data. 1-D numpy array, shape (#samples, #features)
         :param method: PI-generation method. Options: 'DualAQD' or 'MCDropout'
+        :param architecture: Type of NN model to be used. Options: ['shallow' (2 hidden layers),
+                             'deep' (3 hidden layers), 'deeper' (5 hidden layers)]
         :param normData: If True, apply z-score normalization to the inputs and min-max normalization to the outputs
         """
         # Class variables
@@ -25,6 +29,10 @@ def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, met
         self.name = 'temp_' + method  # Save the model in a temp folder
         # Configure model
         self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        if architecture not in ['shallow', 'deep', 'deeper']:
+            sys.exit("For now, the only architecture options available are: ['shallow' (2 hidden layers), "
+                     "'deep' (3 hidden layers), 'deeper' (5 hidden layers)]")
+        self.architecture = architecture
         self.model = self.reset_model()
         self.f = self._set_folder()
 
@@ -34,7 +42,7 @@ def __init__(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array, met
         self.set_data(X, Y, Xval, Yval)
 
     def reset_model(self):
-        return NNModel(device=self.device, nfeatures=self.n_features, method=self.method)
+        return NNModel(device=self.device, nfeatures=self.n_features, method=self.method, architecture=self.architecture)
 
     def set_data(self, X: np.array, Y: np.array, Xval: np.array, Yval: np.array):
         if self.normData:
diff --git a/src/PredictionIntervals/models/NNModel.py b/src/PredictionIntervals/models/NNModel.py
index 823f3a7d..19dcbe7c 100644
--- a/src/PredictionIntervals/models/NNModel.py
+++ b/src/PredictionIntervals/models/NNModel.py
@@ -190,7 +190,7 @@ def __init__(self, model, criterion, optimizer):
 
 class NNModel:
 
-    def __init__(self, device, nfeatures, method):
+    def __init__(self, device, nfeatures, method, architecture):
         self.method = method
         self.device = device
         self.nfeatures = nfeatures
@@ -206,7 +206,12 @@ def __init__(self, device, nfeatures, method):
             self.output_size = 1
 
         criterion = nn.MSELoss()
-        network = NN(input_shape=self.nfeatures, output_size=self.output_size)
+        if architecture == 'shallow':
+            network = NN(input_shape=self.nfeatures, output_size=self.output_size)
+        elif architecture == 'deep':
+            network = NN2(input_shape=self.nfeatures, output_size=self.output_size)
+        else:
+            network = NN3(input_shape=self.nfeatures, output_size=self.output_size)
         network.to(self.device)
         # Training parameters
         optimizer = optim.Adadelta(network.parameters(), lr=0.1)
diff --git a/src/PredictionIntervals/models/network.py b/src/PredictionIntervals/models/network.py
index 07884c58..c477f538 100644
--- a/src/PredictionIntervals/models/network.py
+++ b/src/PredictionIntervals/models/network.py
@@ -23,3 +23,75 @@ def forward(self, x):
         x = self.hidden_layer2(x)
         x = self.drop2(x)
         return self.out(x)
+
+
+class NN2(nn.Module, ABC):
+    """Defines deep NN architecture (3 hidden layers)"""
+
+    def __init__(self, input_shape: int = 10, output_size: int = 1):
+        """
+        Initialize NN
+        :param input_shape: Input shape of the network.
+        :param output_size: Output shape of the network.
+        """
+        super(NN2, self).__init__()
+        self.hidden_layer1 = nn.Sequential(
+            nn.Linear(in_features=input_shape, out_features=500), nn.ReLU())
+        self.drop1 = nn.Dropout(p=0.01)
+        self.hidden_layer2 = nn.Sequential(
+            nn.Linear(in_features=500, out_features=100), nn.ReLU())
+        self.drop2 = nn.Dropout(p=0.01)
+        self.hidden_layer3 = nn.Sequential(
+            nn.Linear(in_features=100, out_features=50), nn.ReLU())
+        self.drop3 = nn.Dropout(p=0.01)
+
+        # Number of outputs depends on the method
+        self.out = nn.Linear(50, output_size)
+
+    def forward(self, x):
+        x = self.hidden_layer1(x)
+        x = self.drop1(x)
+        x = self.hidden_layer2(x)
+        x = self.drop2(x)
+        x = self.hidden_layer3(x)
+        x = self.drop3(x)
+        return self.out(x)
+
+
+class NN3(nn.Module, ABC):
+    """Defines deeper NN architecture (5 hidden layers)"""
+
+    def __init__(self, input_shape: int = 10, output_size: int = 1):
+        """
+        Initialize NN
+        :param input_shape: Input shape of the network.
+        :param output_size: Output shape of the network.
+        """
+        super(NN3, self).__init__()
+        self.hidden_layer1 = nn.Sequential(
+            nn.Linear(in_features=input_shape, out_features=200), nn.ReLU())
+        self.drop1 = nn.Dropout(p=0.01)
+        self.hidden_layer2 = nn.Sequential(
+            nn.Linear(in_features=200, out_features=500), nn.ReLU())
+        self.drop2 = nn.Dropout(p=0.01)
+        self.hidden_layer3 = nn.Sequential(
+            nn.Linear(in_features=500, out_features=500), nn.ReLU())
+        self.drop3 = nn.Dropout(p=0.01)
+        self.hidden_layer4 = nn.Sequential(
+            nn.Linear(in_features=500, out_features=100), nn.ReLU())
+        self.hidden_layer5 = nn.Sequential(
+            nn.Linear(in_features=100, out_features=50), nn.ReLU())
+
+        # Number of outputs depends on the method
+        self.out = nn.Linear(50, output_size)
+
+    def forward(self, x):
+        x = self.hidden_layer1(x)
+        x = self.drop1(x)
+        x = self.hidden_layer2(x)
+        x = self.drop2(x)
+        x = self.hidden_layer3(x)
+        x = self.drop3(x)
+        x = self.hidden_layer4(x)
+        x = self.hidden_layer5(x)
+        return self.out(x)