diff --git a/steps/19_base_dl_class/Solution_1/basedeepclass.py b/steps/19_base_dl_class/Solution_1/basedeepclass.py new file mode 100644 index 0000000..e3f444c --- /dev/null +++ b/steps/19_base_dl_class/Solution_1/basedeepclass.py @@ -0,0 +1,51 @@ +import numpy as np +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +from sklearn.utils import check_random_state + +from sktime.classification.base import BaseClassifier + + +class BaseDeepClassifier(BaseClassifier): + + def __init__(self, batch_size=40, random_state=None): + super(BaseDeepClassifier, self).__init__() + + self.batch_size = batch_size + self.random_state = random_state + self.model_ = None + + def summary(self): + return self.history.history + + def _predict(self, X, **kwargs): + probs = self._predict_proba(X, **kwargs) + rng = check_random_state(self.random_state) + return np.array( + [ + self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] + for prob in probs + ] + ) + + def _predict_proba(self, X, **kwargs): + # Transpose to work correctly with keras + X = X.transpose((0, 2, 1)) + probs = self.model_.predict(X, self.batch_size, **kwargs) + + # check if binary classification + if probs.shape[1] == 1: + # first column is probability of class 0 and second is of class 1 + probs = np.hstack([1 - probs, probs]) + probs = probs / probs.sum(axis=1, keepdims=1) + return probs + + def convert_y_to_keras(self, y): + self.label_encoder = LabelEncoder() + y = self.label_encoder.fit_transform(y) + self.classes_ = self.label_encoder.classes_ + self.n_classes_ = len(self.classes_) + y = y.reshape(len(y), 1) + self.onehot_encoder = OneHotEncoder(sparse=False, categories="auto") + # categories='auto' to get rid of FutureWarning + y = self.onehot_encoder.fit_transform(y) + return y diff --git a/steps/19_base_dl_class/Solution_1/basedeepnetwork.py b/steps/19_base_dl_class/Solution_1/basedeepnetwork.py new file mode 100644 index 0000000..ae111cb --- /dev/null +++ b/steps/19_base_dl_class/Solution_1/basedeepnetwork.py @@ -0,0 +1,38 @@ +from abc import ABC, abstractmethod +from sktime.base import BaseObject + +class BaseDeepNetwork(BaseObject, ABC): + + @abstractmethod + def build_network(self, input_shape, **kwargs): + ... + + def build_model(self, input_shape, n_classes, **kwargs): + import tensorflow as tf + from tensorflow import keras + + tf.random.set_seed(self.random_state) + + if self.metrics is None: + metrics = ["accuracy"] + else: + metrics = self.metrics + input_layer, output_layer = self.build_network(input_shape, **kwargs) + + output_layer = keras.layers.Dense( + units=n_classes, activation=self.activation, use_bias=self.use_bias + )(output_layer) + + self.optimizer_ = ( + keras.optimizers.Adam(learning_rate=0.01) + if self.optimizer is None + else self.optimizer + ) + + model = keras.models.Model(inputs=input_layer, outputs=output_layer) + model.compile( + loss=self.loss, + optimizer=self.optimizer_, + metrics=metrics, + ) + return model diff --git a/steps/19_base_dl_class/Solution_1/basedeepreg.py b/steps/19_base_dl_class/Solution_1/basedeepreg.py new file mode 100644 index 0000000..3096027 --- /dev/null +++ b/steps/19_base_dl_class/Solution_1/basedeepreg.py @@ -0,0 +1,30 @@ +import numpy as np + +from sktime.regression.base import BaseRegressor + +class BaseDeepRegressor(BaseRegressor, ABC): + + def __init__(self, batch_size=40): + super(BaseDeepRegressor, self).__init__() + + self.batch_size = batch_size + self.model_ = None + + def _predict(self, X, **kwargs): + """ + Find regression estimate for all cases in X. + + Parameters + ---------- + X : an np.ndarray of shape = (n_instances, n_dimensions, series_length) + The training input samples. + + Returns + ------- + predictions : 1d numpy array + array of predictions of each instance + """ + X = X.transpose((0, 2, 1)) + y_pred = self.model_.predict(X, self.batch_size, **kwargs) + y_pred = np.squeeze(y_pred, axis=-1) + return y_pred diff --git a/steps/19_base_dl_class/Solution_1/cnnclass.py b/steps/19_base_dl_class/Solution_1/cnnclass.py new file mode 100644 index 0000000..ab1906f --- /dev/null +++ b/steps/19_base_dl_class/Solution_1/cnnclass.py @@ -0,0 +1,75 @@ + +from sklearn.utils import check_random_state +from cnnnetwork import CNNNetwork +from basedeepclass import BaseDeepClassifier +from sktime.utils.validation._dependencies import _check_dl_dependencies + +_check_dl_dependencies(severity="warning") + + +class CNNClassifier(BaseDeepClassifier, CNNNetwork): + + def __init__( + self, + n_epochs=2000, + batch_size=16, + kernel_size=7, + avg_pool_size=3, + n_conv_layers=2, + callbacks=None, + verbose=False, + loss="mean_squared_error", + metrics=None, + random_state=None, + activation="sigmoid", + use_bias=True, + optimizer=None, + ): + _check_dl_dependencies(severity="error") + super(CNNClassifier, self).__init__() + self.n_conv_layers = n_conv_layers + self.avg_pool_size = avg_pool_size + self.kernel_size = kernel_size + self.callbacks = callbacks + self.n_epochs = n_epochs + self.batch_size = batch_size + self.verbose = verbose + self.loss = loss + self.metrics = metrics + self.random_state = random_state + self.activation = activation + self.use_bias = use_bias + self.optimizer = optimizer + self.history = None + + def _fit(self, X, y): + if self.callbacks is None: + self._callbacks = [] + + y_onehot = self.convert_y_to_keras(y) + # Transpose to conform to Keras input style. + X = X.transpose(0, 2, 1) + + check_random_state(self.random_state) + self.input_shape = X.shape[1:] + self.model_ = self.build_model(self.input_shape, self.n_classes_) + if self.verbose: + self.model_.summary() + self.history = self.model_.fit( + X, + y_onehot, + batch_size=self.batch_size, + epochs=self.n_epochs, + verbose=self.verbose, + callbacks=self._callbacks, + ) + return self + +if __name__ == "__main__": + cnn = CNNClassifier() + from sktime.datasets import load_unit_test + X_train, y_train = load_unit_test(split='train', return_X_y=True) + cnn.fit(X_train, y_train) + X_test, y_test = load_unit_test(split='test', return_X_y=True) + print(cnn.predict(X_test)) + print(y_test) diff --git a/steps/19_base_dl_class/Solution_1/cnnnetwork.py b/steps/19_base_dl_class/Solution_1/cnnnetwork.py new file mode 100644 index 0000000..3be030a --- /dev/null +++ b/steps/19_base_dl_class/Solution_1/cnnnetwork.py @@ -0,0 +1,60 @@ +from basedeepnetwork import BaseDeepNetwork +from sktime.utils.validation._dependencies import _check_dl_dependencies + +_check_dl_dependencies(severity="warning") + + +class CNNNetwork(BaseDeepNetwork): + def __init__( + self, + kernel_size=7, + avg_pool_size=3, + n_conv_layers=2, + activation="sigmoid", + random_state=0, + ): + _check_dl_dependencies(severity="error") + self.random_state = random_state + self.kernel_size = kernel_size + self.avg_pool_size = avg_pool_size + self.n_conv_layers = n_conv_layers + self.filter_sizes = [6, 12] + self.activation = activation + + def build_network(self, input_shape, **kwargs): + # not sure of the whole padding thing + from tensorflow import keras + + padding = "valid" + input_layer = keras.layers.Input(input_shape) + # sort this out, why hard coded to 60? + if input_shape[0] < 60: + padding = "same" + + # this does what? + if len(self.filter_sizes) > self.n_conv_layers: + self.filter_sizes = self.filter_sizes[: self.n_conv_layers] + elif len(self.filter_sizes) < self.n_conv_layers: + self.filter_sizes = self.filter_sizes + [self.filter_sizes[-1]] * ( + self.n_conv_layers - len(self.filter_sizes) + ) + conv = keras.layers.Conv1D( + filters=self.filter_sizes[0], + kernel_size=self.kernel_size, + padding=padding, + activation=self.activation, + )(input_layer) + conv = keras.layers.AveragePooling1D(pool_size=self.avg_pool_size)(conv) + + for i in range(1, self.n_conv_layers): + conv = keras.layers.Conv1D( + filters=self.filter_sizes[i], + kernel_size=self.kernel_size, + padding=padding, + activation=self.activation, + )(conv) + conv = keras.layers.AveragePooling1D(pool_size=self.avg_pool_size)(conv) + + flatten_layer = keras.layers.Flatten()(conv) + + return input_layer, flatten_layer diff --git a/steps/19_base_dl_class/Solution_1/cnnreg.py b/steps/19_base_dl_class/Solution_1/cnnreg.py new file mode 100644 index 0000000..35a9dc6 --- /dev/null +++ b/steps/19_base_dl_class/Solution_1/cnnreg.py @@ -0,0 +1,72 @@ +from cnnnetwork import CNNNetwork +from basedeepreg import BaseDeepRegressor +from sktime.utils.validation._dependencies import _check_dl_dependencies + +_check_dl_dependencies(severity="warning") + + +class CNNRegressor(BaseDeepRegressor, CNNRegressor): + + def __init__( + self, + n_epochs=2000, + batch_size=16, + kernel_size=7, + avg_pool_size=3, + n_conv_layers=2, + callbacks=None, + verbose=False, + loss="mean_squared_error", + metrics=None, + random_seed=0, + ): + _check_dl_dependencies(severity="error") + super(CNNRegressor, self).__init__( + batch_size=batch_size, + ) + self.n_conv_layers = n_conv_layers + self.avg_pool_size = avg_pool_size + self.kernel_size = kernel_size + self.callbacks = callbacks + self.n_epochs = n_epochs + self.batch_size = batch_size + self.verbose = verbose + self.loss = loss + self.metrics = metrics + self.random_seed = random_seed + + + def _fit(self, X, y): + """Fit the classifier on the training set (X, y). + + Parameters + ---------- + X : np.ndarray of shape = (n_instances (n), n_dimensions (d), series_length (m)) + The training input samples. + y : np.ndarray of shape n + The training data class labels. + + Returns + ------- + self : object + """ + if self.callbacks is None: + self._callbacks = [] + + # Transpose to conform to Keras input style. + X = X.transpose(0, 2, 1) + + self.input_shape = X.shape[1:] + self.model_ = self.build_model(self.input_shape) + if self.verbose: + self.model.summary() + + self.history = self.model_.fit( + X, + y, + batch_size=self.batch_size, + epochs=self.n_epochs, + verbose=self.verbose, + callbacks=self._callbacks, + ) + return self diff --git a/steps/19_base_dl_class/Solution_2/basedeepclass.py b/steps/19_base_dl_class/Solution_2/basedeepclass.py new file mode 100644 index 0000000..8868aba --- /dev/null +++ b/steps/19_base_dl_class/Solution_2/basedeepclass.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +""" +Abstract base class for the Keras neural network classifiers. + +The reason for this class between BaseClassifier and deep_learning classifiers is +because we can generalise tags, _predict and _predict_proba +""" +__author__ = ["James-Large", "ABostrom", "TonyBagnall"] +__all__ = ["BaseDeepClassifier"] + +from abc import ABC, abstractmethod + +import numpy as np +from sklearn.preprocessing import LabelEncoder, OneHotEncoder +from sklearn.utils import check_random_state + +from sktime.classification.base import BaseClassifier +from baseest import BaseDeepEstimator + + +class BaseDeepClassifier(BaseClassifier, ABC, BaseDeepEstimator): + """Abstract base class for deep learning time series classifiers. + + The base classifier provides a deep learning default method for + _predict and _predict_proba, and provides a new abstract method for building a + model. + + Parameters + ---------- + batch_size : int, default = 40 + training batch size for the model + + Arguments + --------- + self.model = None + + """ + + _tags = { + "X_inner_mtype": "numpy3D", + "capability:multivariate": True, + "python_dependencies": "tensorflow", + } + + def __init__(self, batch_size=40, random_state=None): + super(BaseDeepClassifier, self).__init__() + + self.batch_size = batch_size + self.random_state = random_state + self.model_ = None + + @abstractmethod + def build_model(self, input_shape, n_classes, **kwargs): + """Construct a compiled, un-trained, keras model that is ready for training. + + Parameters + ---------- + input_shape : tuple + The shape of the data fed into the input layer + n_classes: int + The number of classes, which shall become the size of the output + layer + + Returns + ------- + A compiled Keras Model + """ + ... + + + def _predict(self, X, **kwargs): + probs = self._predict_proba(X, **kwargs) + rng = check_random_state(self.random_state) + return np.array( + [ + self.classes_[int(rng.choice(np.flatnonzero(prob == prob.max())))] + for prob in probs + ] + ) + + def _predict_proba(self, X, **kwargs): + """Find probability estimates for each class for all cases in X. + + Parameters + ---------- + X : an np.ndarray of shape = (n_instances, n_dimensions, series_length) + The training input samples. input_checks: boolean + whether to check the X parameter + + Returns + ------- + output : array of shape = [n_instances, n_classes] of probabilities + """ + # Transpose to work correctly with keras + X = X.transpose((0, 2, 1)) + probs = self.model_.predict(X, self.batch_size, **kwargs) + + # check if binary classification + if probs.shape[1] == 1: + # first column is probability of class 0 and second is of class 1 + probs = np.hstack([1 - probs, probs]) + probs = probs / probs.sum(axis=1, keepdims=1) + return probs diff --git a/steps/19_base_dl_class/Solution_2/basedeepreg.py b/steps/19_base_dl_class/Solution_2/basedeepreg.py new file mode 100644 index 0000000..6044db9 --- /dev/null +++ b/steps/19_base_dl_class/Solution_2/basedeepreg.py @@ -0,0 +1,72 @@ +from abc import ABC, abstractmethod + +import numpy as np + +from sktime.regression.base import BaseRegressor +from baseest import BaseDeepEstimator + + +class BaseDeepRegressor(BaseRegressor, ABC, BaseDeepEstimator): + """Abstract base class for deep learning time series regression. + + The base classifier provides a deep learning default method for + _predict, and provides a new abstract method for building a + model. + + Parameters + ---------- + batch_size : int, default = 40 + training batch size for the model + + Arguments + --------- + self.model = None + + """ + + _tags = { + "X_inner_mtype": "numpy3D", + "capability:multivariate": True, + "python_dependencies": "tensorflow", + } + + def __init__(self, batch_size=40): + super(BaseDeepRegressor, self).__init__() + + self.batch_size = batch_size + self.model_ = None + + @abstractmethod + def build_model(self, input_shape, **kwargs): + """ + Construct a compiled, un-trained, keras model that is ready for training. + + Parameters + ---------- + input_shape : tuple + The shape of the data fed into the input layer + + Returns + ------- + A compiled Keras Model + """ + ... + + def _predict(self, X, **kwargs): + """ + Find regression estimate for all cases in X. + + Parameters + ---------- + X : an np.ndarray of shape = (n_instances, n_dimensions, series_length) + The training input samples. + + Returns + ------- + predictions : 1d numpy array + array of predictions of each instance + """ + X = X.transpose((0, 2, 1)) + y_pred = self.model_.predict(X, self.batch_size, **kwargs) + y_pred = np.squeeze(y_pred, axis=-1) + return y_pred diff --git a/steps/19_base_dl_class/Solution_2/baseest.py b/steps/19_base_dl_class/Solution_2/baseest.py new file mode 100644 index 0000000..eed2a23 --- /dev/null +++ b/steps/19_base_dl_class/Solution_2/baseest.py @@ -0,0 +1,23 @@ +from sklearn.preprocessing import LabelEncoder, OneHotEncoder + +class BaseDeepEstimator(BaseEstimator): + + def __init__(self, batch_size=40, random_state=None): + + self.batch_size = batch_size + self.random_state = random_state + self.model_ = None + + def summary(self): + return self.history.history + + def convert_y_to_keras(self, y): + self.label_encoder = LabelEncoder() + y = self.label_encoder.fit_transform(y) + self.classes_ = self.label_encoder.classes_ + self.n_classes_ = len(self.classes_) + y = y.reshape(len(y), 1) + self.onehot_encoder = OneHotEncoder(sparse=False, categories="auto") + # categories='auto' to get rid of FutureWarning + y = self.onehot_encoder.fit_transform(y) + return y diff --git a/steps/19_base_dl_class/step.md b/steps/19_base_dl_class/step.md new file mode 100644 index 0000000..f556fce --- /dev/null +++ b/steps/19_base_dl_class/step.md @@ -0,0 +1,269 @@ +# BaseDeepNetwork + +Contributors: ['AurumnPegasus'] + +## Introduction + +Each DL Estimator primarly have two parts: Network, and Estimator. The network is the class containing the core keras code for the DL Estimator, where we build our main keras network. Estimator class is simply a layer of abstraction for the user for easy interaction with the keras network. +For different DL estimators, the only thing that mainly changes is the `network` (since most abstractions are structurally the same). Hence, having a common Base Class for DL models would make sense, as it reduces redundancy of code, and allows for a common testing interface. + +For preliminary discussions of the proposal presented here, see issue: [#3190](https://github.com/alan-turing-institute/sktime/issues/3190) + +## Contents + +[TOC] + +## Problem statement + +The current implementation considers `Network` and `Estimator` to be two different entities, which interact with each other via objects. For example, as shown in the figure below: If I were to create a `CNNClassifier`, it creates an object of `CNNNetwork` within it, from which I call the `build_network` function to get my keras network to be used in `CNNClassifier`. +Within the estimator, if I want to create a `CNNClassifier`, I inherit from `BaseDeepClassifier` (which contains lots of common functions for all DL Classifiers). The issue with this is that there will exist a `BaseDeepRegressor` and `BaseDeepForecastor` as well, which will be specific to Regressors and Forecastors respectively. These will lead to lot of redundant code, since there are lot of functionalities which are the same across all DL Estimators. +Hence, we need to design a `BaseDeepClass`, which will contain all code and structure which needs to be inherited by every DL estimator. + +### Current Implementation + +![](https://i.imgur.com/1x2IjJv.png) + +- Legend: + - Green: `sktime` Base Class + - Blue: `sktime` child classes + - Red: `sklearn` Base Class + +`BaseDeepNetwork` is a base class for creating keras networks. Each specific neural network is child of the `BaseDeepNetwork`. For example, `CNNNetwork`, `CNTCNetwork`, `LSTMNetwork` etc would be classes inheriting from the `BaseDeepNetwork` having a single function called build_model (which builds and returns the created keras network) + +```python +class BaseDeepNetwork(BaseObject, ABC): + + @abstractmethod + def build_network(self, input_shape, **kwargs): + # Creates keras networks and returns input and output layers +``` + +For estimators, there exist specific `BaseDeepClassifier` and `BaseDeepRegressor`, inheriting from `BaseClassifier` and `BaseRegressor` respectively. +Specific estimators like `CNNClassifier` inherit from `BaseDeepClassifier`, and within the `init` method create an object of the class `CNNNetwork`. Then, in the fit method, it gets the respective keras neural network by calling `build_network` method from the `CNNNetwork` object. + +```python +class CNNClassifier(BaseDeepClassifier): + def __init__(self): + # creates object of network class + self._network = CNNNetwork() + + def build_model(self): + # gets network from created object of the class + input_layer, output_layer = self._network.build_network() + + # additional dense layer on top of output layer + output_layer = keras.layers.Dense()(output_layer) + + model.compile() + return model + + def _fit(self, X, y): + # gets the compiled model here + self.model_ = self.build_model() +``` + +### Problems + +1. Repeated code across `BaseDeepClassifier` and `BaseDeepRegressor` + - Since there is no common `BaseDeepClass` (and `BaseDeepNetwork` is just related to the networks), there are functions which are repeated across everything + - `build_model`: Is the same across all classifiers and regressors (only difference is `n_classes` for classifiers, the value of which is 1 for regressors, but since it is a parameter it can be considered the same) + - `fit`: Is almost the same across all classifiers and across all regressors + - `save`: function when merged with main [#3128](https://github.com/alan-turing-institute/sktime/pull/3128) will be common as well. +2. No dedicated testing suite for Deep Learning networks: + - One of the ideas discussed was to introduce tests specific to DL networks + - Test to see if saved and loaded model give the same answer + - Writing pytest with different parameters (currently afaik DL models arent tested with different parameters at all) + - Test to see if loss is reducing over epochs + +## Alternative Solution 1 + +Lets say I want to create `CNNClassifier`. So the first step here would be to define `CNNNetwork` and `BaseDeepNetwork` with there structure. + +![](https://i.imgur.com/4uRJrYr.png) + +--- + +In this solution, we use `BaseDeepNetwork` as a base class in which we define all functionalities similar across all keras network (for eg: save, load etc) + +So, our `BaseDeepNetwork` would end up looking like (The proper defined code can be seen in `Solution_1/basedeepnetwork.py`): + +```python +class BaseDeepNetwork(BaseObject, ABC): + + @abstractmethod + def build_network(self, input_shape, **kwargs): + ... + + def build_model(self, input_shape, n_classes, **kwargs): + input_layer, output_layer = self.build_network(input_shape, **kwargs) + + model = keras.models.Model(inputs=input_layer, outputs=output_layer) + + model.compile( + loss=self.loss, + optimizer=self.optimizer_, + metrics=metrics, + ) + return model +``` + +Here, I am defining `BaseDeepNetwork` in a way which will give basic structure for any specific network down the line. For example, now if I want to create `CNNNetwork`, I just need to overwrite the `build_network` function of `BaseDeepNetwork` (since other functions relevant to keras network have already been defined). (The proper defined code can be seen in `Solution_1/cnnnetwork.py`) + +```python +class CNNNetwork(BaseDeepNetwork): + def __init__( + self, + ): + pass + + def build_network(self, input_shape, **kwargs): + conv = keras.layers.Conv1D( + filters=self.filter_sizes[0], + kernel_size=self.kernel_size, + padding=padding, + activation=self.activation, + )(input_layer) + conv = keras.layers.AveragePooling1D(pool_size=self.avg_pool_size)(conv) + + flatten_layer = keras.layers.Flatten()(conv) + + return input_layer, flatten_layer +``` +--- + +Now, once we have created the structure for `Network`, we need to integrate it with the `Estimators`. Let's say I want to create `CNNClassifier` and `CNNRegressor`: + +The structure of `BaseDeepClassifier` (`BaseDeepRegressor` will be similar) is slightly different now. Earlier, `BaseDeepClassifier` used to have all the functions and code related to DL Classifiers and DL Networks in it, but now I have kept code related to `network` in `BaseDeepNetwork`, so the `BaseDeepClassifier` will only have code specific to DL Classifiers. (The complete code is there in `Solution_1/basedeepclass.py`) + +```python +class BaseDeepClassifier(BaseClassifier): + + def __init__(self, batch_size=40, random_state=None): + super(BaseDeepClassifier, self).__init__() + + self.batch_size = batch_size + self.random_state = random_state + self.model_ = None + + def summary(self): + return self.history.history + + def _predict(self, X, **kwargs): + pass + + def _predict_proba(self, X, **kwargs): + pass +``` + +Previously, we would have put the functions related to `save` or `load` in `BaseDeepClassifier`, but since they are specific to keras network, we leave it out from `BaseDeepClassifier` now. + +To see how we integrate `CNNNetwork` with `CNNClassifier`, we define: + +```python +class CNNClassifier(BaseDeepClassifier, CNNNetwork): + + def __init__( + self, + args + ): + _check_dl_dependencies(severity="error") + super(CNNClassifier, self).__init__() + + def _fit(self, X, y): + self.model_ = self.build_model(self.input_shape, self.n_classes_) + self.history = self.model_.fit( + X, + y_onehot, + args + ) + return self +``` + +The complete version of this code is written in `Solution_1/cnnclass.py`. The difference from the original structure to this is that here, we inherit from `BaseDeepClassifier` (to preserve the structure required from Classifiers, as well as have some specific functions related to DL Classifiers) and `CNNNetwork` (to preserve structure required from all DL Networks) + +--- + + +### Steps to create a new Classifiers (XClassifier) + +1. Create `XNetwork` class, which inherits from `BaseDeepNetwork` +2. Create `XClassifier` class, which inherits from both `XNetwork` and `BaseDeepClassifier` + +Pros: +1. `CNNNetwork` becomes deeply integrated to the appropriate classifiers +2. There are 2 different base classes for DL estimators + a. `BaseDeepNetwork`: (parent of `CNNNetwork`) which is more of a base class for the networks themselves, than the estimators. A lot of code which will be similar for all networks can be kept here, for example: summary, save. + b. `BaseDeepClassifier`: Here, code which is specific to all DL classifiers can be kept + + +## Alternative Solution 2 + +Lets say I want to create `CNNClassifier`. In this solution, I do not propose any significant changes in the structure of `BaseDeepNetwork` or `CNNNetwork`, they largly remain the same. + +--- + +To go around the problem of writing functions specific to `networks` in all `BaseDeep` classes, we create a parent `BaseDeepEstimator` (a complete idea is there in `Solution_2/baseest.py`) + +```python +class BaseDeepEstimator(BaseEstimator): + + def __init__(self, batch_size=40, random_state=None): + + self.batch_size = batch_size + self.random_state = random_state + self.model_ = None + + def summary(self): + return self.history.history + + def convert_y_to_keras(self, y): + pass + + def save(self, path): + pass +``` + +The `BaseDeepEstimator` will have all network specific code which has to be present in all DL Estimators. The whole idea is that to use `BaseDeepNetwork` as a seperate entity just for the creation of `CNNNetwork`, and take care of all redundancies of code in `BaseDeepEstimator` + +Based on this, we re-define our `BaseDeepClassifier` to inherit functions/structure of Classifier from `BaseClassifier` and functions/structure of DL Network from `BaseDeepEstimators`. The complete idea is there in `Solution_2/basedeepclass.py`: + +```python +class BaseDeepClassifier(BaseClassifier, ABC, BaseDeepEstimator): + + def __init__(self, batch_size=40, random_state=None): + super(BaseDeepClassifier, self).__init__() + + self.batch_size = batch_size + self.random_state = random_state + self.model_ = None + + @abstractmethod + def build_model(self, input_shape, n_classes, **kwargs): + ... + + + def _predict(self, X, **kwargs): + pass + + def _predict_proba(self, X, **kwargs): + pass + +``` + +Here, the `CNNClassifier` largly remains unchanged as well, since the structure of it doesnt go through a huge change. + +### Steps to Implement a New Classifier (XClassifier) + +1. Create a `XNetwork` class, which inherits from `BaseDeepNetwork` +2. Create the `XClassifier` class, which inherits from `BaseDeepClassifier` +3. Create an object of `XNetwork` in `__init__` of `XClassifier`, which is then used in `build_model` function of `XClassifier` to get the keras network. + +These steps are the same on how to create a new classifier in current implementation. + +--- + +Pros: +1. Do not have to change anything for any of the Regressors or Classifiers. +2. Minimal changes required imply it is the easiest to implement, with no real complications +3. Existing issue of re-writing code across `BaseDeepClassifier`, `BaseDeepRegressor` (and in the futere, `BaseDeepForecastor`) will be solved using a common parent `BaseDeepEstimator`. +