diff --git a/config.py b/config.py index 4ad5b53..1eb8df4 100644 --- a/config.py +++ b/config.py @@ -2,38 +2,40 @@ from orca_python.utilities import Utilities -ex = Experiment('Experiment Configuration') +ex = Experiment("Experiment Configuration") + @ex.config def default_config(): + # Giving default values + general_conf = { + "basedir": "", + "dataset": "", + "input_preprocessing": "", + "hyperparam_cv_nfolds": 3, + "jobs": 1, + "metrics": "mae", + "cv_metric": "mae", + "output_folder": "my_runs/", + } - # Giving default values - general_conf = {"basedir": "", - "dataset": "", - "input_preprocessing": "", - "hyperparam_cv_nfolds": 3, - "jobs": 1, - "metrics": "ccr", - "cv_metric": "ccr", - "output_folder": "my_runs/" - } + configurations = {} - configurations = {} @ex.automain def main(general_conf, configurations): - - if not general_conf['basedir'] or not general_conf['datasets']: - - raise RuntimeError('A dataset has to be defined to run this program.\n' + - 'For more information about using this framework, please refer to the README.') - - if not configurations: - - raise RuntimeError('No configuration was defined.\n' + - 'For more information about using this framework, please refer to the README.') - - - interface = Utilities(general_conf, configurations) - interface.run_experiment() - interface.write_report() + if not general_conf["basedir"] or not general_conf["datasets"]: + raise RuntimeError( + "A dataset has to be defined to run this program.\n" + + "For more information about using this framework, please refer to the README." + ) + + if not configurations: + raise RuntimeError( + "No configuration was defined.\n" + + "For more information about using this framework, please refer to the README." + ) + + interface = Utilities(general_conf, configurations) + interface.run_experiment() + interface.write_report() diff --git a/orca_python/classifiers/CSSVC.py b/orca_python/classifiers/CSSVC.py new file mode 100644 index 0000000..4dad37d --- /dev/null +++ b/orca_python/classifiers/CSSVC.py @@ -0,0 +1,109 @@ +# encoding: utf-8 +import numpy as np +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import check_X_y, check_array, check_is_fitted +from sklearn.svm import SVC + + +class CSSVC(BaseEstimator, ClassifierMixin): + def __init__( + self, + kernel="rbf", + degree=3, + gamma=1, + coef0=0, + C=1, + cache_size=200, + tol=1e-3, + shrinking=True, + probability_estimates=False, + weight=None, + random_state=None, + ): + self.kernel = kernel + self.degree = degree + self.gamma = gamma + self.coef0 = coef0 + self.C = C + self.cache_size = cache_size + self.tol = tol + self.shrinking = shrinking + self.probability_estimates = probability_estimates + self.weight = weight + self.random_state = random_state + + self.models_ = [] + + def fit(self, X, y): + """ + Fit the model with the training data + Parameters + ---------- + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of samples + and n_features is the number of features + y: array-like, shape (n_samples) + Target vector relative to X + + p: Label of the pattern which is choose for 1vsALL + Returns + ------- + self: object + """ + X, y = check_X_y(X, y) + + for c in np.unique(y): + patterns_class = np.where(y == c, 1, 0) + + self.classifier_ = SVC( + C=self.C, + kernel=self.kernel, + degree=self.degree, + gamma=self.gamma, + coef0=self.coef0, + shrinking=self.shrinking, + probability=self.probability_estimates, + tol=self.tol, + cache_size=self.cache_size, + class_weight=self.weight, + random_state=self.random_state, + ) + + w = self.ordinalWeights(c, y) + self.models_.append( + self.classifier_.fit(X, patterns_class, sample_weight=w) + ) + return self + + def predict(self, X): + """ + Performs classification on samples in X + Parameters + ---------- + X : {array-like, sparse matrix}, shape (n_samples, n_features) + Returns + ------- + predicted_y : array, shape (n_samples, n_samples) + Class labels for samples in X. + """ + check_is_fitted(self, "models_") + + decfuncs = np.zeros((len(X), len(self.models_))) + + X = check_array(X) + + for idx, model in enumerate(self.models_): + decfuncs[:, idx] = model.decision_function(X) + + preds = np.argmax(decfuncs, axis=1) + 1 + + return preds + + def ordinalWeights(self, p, targets): + w = np.ones(len(targets)) + w[targets != p] = ( + (abs(p - targets[targets != p]) + 1) + * len(targets[targets != p]) + / sum(abs(p - targets[targets != p]) + 1) + ) + return w diff --git a/orca_python/classifiers/NNOP.py b/orca_python/classifiers/NNOP.py index 18ff5a7..3297ef6 100644 --- a/orca_python/classifiers/NNOP.py +++ b/orca_python/classifiers/NNOP.py @@ -1,541 +1,552 @@ # encoding: utf-8 import numpy as np -import math as math from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import unique_labels import scipy + class NNOP(BaseEstimator, ClassifierMixin): - - """ - - NNOP Neural Network with Ordered Partitions (NNOP). This model - considers the OrderedPartitions coding scheme for the labels and a - rule for decisions based on the first node whose output is higher - than a predefined threshold (T=0.5, in our experiments). The - model has one hidden layer with hiddenN neurons and one outputlayer - with as many neurons as the number of classes minus one. The learning - is based on iRProp+ algorithm and the implementation provided by - Roberto Calandra in his toolbox Rprop Toolbox for {MATLAB}: - http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox - The model is adjusted by minimizing mean squared error. A regularization - parameter "lambda" is included based on L2, and the number of - iterations is specified by the "iterations" parameter. - - NNOP public methods: - fit - Fits a model from training data - predict - Performs label prediction - - References: - [1] J. Cheng, Z. Wang, and G. Pollastri, "A neural network - approach to ordinal regression," in Proc. IEEE Int. Joint - Conf. Neural Netw. (IEEE World Congr. Comput. Intell.), 2008, - pp. 1279-1284. - [2] P.A. Gutiérrez, M. Pérez-Ortiz, J. Sánchez-Monedero, - F. Fernández-Navarro and C. Hervás-Martínez - Ordinal regression methods: survey and experimental study - IEEE Transactions on Knowledge and Data Engineering, Vol. 28. - Issue 1, 2016 - http://dx.doi.org/10.1109/TKDE.2015.2457911 - - This file is part of ORCA: https://github.com/ayrna/orca - Original authors: Pedro Antonio Gutiérrez, María Pérez Ortiz, Javier Sánchez Monedero - Citation: If you use this code, please cite the associated paper http://www.uco.es/grupos/ayrna/orreview - Copyright: - This software is released under the The GNU General Public License v3.0 licence - available at http://www.gnu.org/licenses/gpl-3.0.html - - NNOP properties: - epsilonInit - Range for initializing the weights. - hiddenN - Number of hidden neurons of the - model. - iterations - Number of iterations for fmin_l_bfgs_b - algorithm. - lambdaValue - Regularization parameter. - theta1 - Hidden layer weigths (with bias). - theta2 - Output layer weigths. - num_labels - Number of labels in the problem. - m - Number of samples of X (train patterns array). - - """ - - # Constructor of class NNOP (set parameters values). - def __init__(self, epsilonInit=0.5, hiddenN=50, iterations=500, lambdaValue=0.01): - - self.epsilonInit = epsilonInit - self.hiddenN = hiddenN - self.iterations = iterations - self.lambdaValue = lambdaValue - - - #--------Main functions (Public Access)-------- - - - def fit(self,X,y): - - """ - - Trains the model for the model NNOP method with TRAIN data. - Returns the projection of patterns (only valid for threshold models) and the predicted labels. - - Parameters - ---------- - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - Training patterns array, where n_samples is the number of samples - and n_features is the number of features - - y: array-like, shape (n_samples) - Target vector relative to X - - Returns - ------- - - self: The object NNOP. - - """ - if self.epsilonInit < 0 or self.hiddenN < 1 or self.iterations < 1 or self.lambdaValue < 0: - return None - - - # Check that X and y have correct shape - X, y = check_X_y(X, y) - # Store the classes seen during fit - self.classes_ = unique_labels(y) - - # Aux variables - y = y[:,np.newaxis] - input_layer_size = X.shape[1] - num_labels = np.size(np.unique(y)) - m = X.shape[0] - - # Recode y to Y using ordinalPartitions coding - Y = 1 * (np.tile(y, (1,num_labels)) <= np.tile(np.arange(1,num_labels+1)[np.newaxis,:], (m,1))) - - # Hidden layer weigths (with bias) - initial_Theta1 = self.__randInitializeWeights(input_layer_size+1, self.getHiddenN()) - # Output layer weigths - initial_Theta2 = self.__randInitializeWeights(self.getHiddenN()+1, num_labels-1) - - # Pack parameters - initial_nn_params = np.concatenate((initial_Theta1.flatten(order='F'), - initial_Theta2.flatten(order='F')), axis=0)[:,np.newaxis] - - results_optimization = scipy.optimize.fmin_l_bfgs_b(func=self.__nnOPCostFunction, x0=initial_nn_params.ravel(),args=(input_layer_size, self.hiddenN, - num_labels, X, Y, self.lambdaValue), fprime=None, factr=1e3, maxiter=self.iterations,iprint=-1) - - self.nn_params = results_optimization[0] - # Unpack the parameters - Theta1, Theta2 = self.__unpackParameters(self.nn_params, input_layer_size, self.getHiddenN(), num_labels) - self.theta1 = Theta1 - self.theta2 = Theta2 - self.num_labels = num_labels - self.m = m - - return self - - def predict (self, test): - - """ - Predicts labels of TEST patterns labels. The object needs to be fitted to the data first. + """ + + NNOP Neural Network with Ordered Partitions (NNOP). This model + considers the OrderedPartitions coding scheme for the labels and a + rule for decisions based on the first node whose output is higher + than a predefined threshold (T=0.5, in our experiments). The + model has one hidden layer with hiddenN neurons and one outputlayer + with as many neurons as the number of classes minus one. The learning + is based on iRProp+ algorithm and the implementation provided by + Roberto Calandra in his toolbox Rprop Toolbox for {MATLAB}: + http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox + The model is adjusted by minimizing mean squared error. A regularization + parameter "lambda" is included based on L2, and the number of + iterations is specified by the "iterations" parameter. + + NNOP public methods: + fit - Fits a model from training data + predict - Performs label prediction + + References: + [1] J. Cheng, Z. Wang, and G. Pollastri, "A neural network + approach to ordinal regression," in Proc. IEEE Int. Joint + Conf. Neural Netw. (IEEE World Congr. Comput. Intell.), 2008, + pp. 1279-1284. + [2] P.A. Gutiérrez, M. Pérez-Ortiz, J. Sánchez-Monedero, + F. Fernández-Navarro and C. Hervás-Martínez + Ordinal regression methods: survey and experimental study + IEEE Transactions on Knowledge and Data Engineering, Vol. 28. + Issue 1, 2016 + http://dx.doi.org/10.1109/TKDE.2015.2457911 + + This file is part of ORCA: https://github.com/ayrna/orca + Original authors: Pedro Antonio Gutiérrez, María Pérez Ortiz, Javier Sánchez Monedero + Citation: If you use this code, please cite the associated paper http://www.uco.es/grupos/ayrna/orreview + Copyright: + This software is released under the The GNU General Public License v3.0 licence + available at http://www.gnu.org/licenses/gpl-3.0.html + + NNOP properties: + epsilonInit - Range for initializing the weights. + hiddenN - Number of hidden neurons of the + model. + iterations - Number of iterations for fmin_l_bfgs_b + algorithm. + lambdaValue - Regularization parameter. + theta1 - Hidden layer weigths (with bias). + theta2 - Output layer weigths. + num_labels - Number of labels in the problem. + m - Number of samples of X (train patterns array). + + """ + + # Constructor of class NNOP (set parameters values). + def __init__(self, epsilonInit=0.5, hiddenN=50, iterations=500, lambdaValue=0.01): + self.epsilonInit = epsilonInit + self.hiddenN = hiddenN + self.iterations = iterations + self.lambdaValue = lambdaValue + + # --------Main functions (Public Access)-------- + + def fit(self, X, y): + """ + + Trains the model for the model NNOP method with TRAIN data. + Returns the projection of patterns (only valid for threshold models) and the predicted labels. + + Parameters + ---------- + + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of samples + and n_features is the number of features + + y: array-like, shape (n_samples) + Target vector relative to X + + Returns + ------- + + self: The object NNOP. + + """ + if ( + self.epsilonInit < 0 + or self.hiddenN < 1 + or self.iterations < 1 + or self.lambdaValue < 0 + ): + return None + + # Check that X and y have correct shape + X, y = check_X_y(X, y) + # Store the classes seen during fit + self.classes_ = unique_labels(y) + + # Aux variables + y = y[:, np.newaxis] + input_layer_size = X.shape[1] + num_labels = np.size(np.unique(y)) + m = X.shape[0] + + # Recode y to Y using ordinalPartitions coding + Y = 1 * ( + np.tile(y, (1, num_labels)) + <= np.tile(np.arange(1, num_labels + 1)[np.newaxis, :], (m, 1)) + ) + + # Hidden layer weigths (with bias) + initial_Theta1 = self.__randInitializeWeights( + input_layer_size + 1, self.getHiddenN() + ) + # Output layer weigths + initial_Theta2 = self.__randInitializeWeights( + self.getHiddenN() + 1, num_labels - 1 + ) + + # Pack parameters + initial_nn_params = np.concatenate( + (initial_Theta1.flatten(order="F"), initial_Theta2.flatten(order="F")), + axis=0, + )[:, np.newaxis] + + results_optimization = scipy.optimize.fmin_l_bfgs_b( + func=self.__nnOPCostFunction, + x0=initial_nn_params.ravel(), + args=(input_layer_size, self.hiddenN, num_labels, X, Y, self.lambdaValue), + fprime=None, + factr=1e3, + maxiter=self.iterations, + iprint=-1, + ) + + self.nn_params = results_optimization[0] + # Unpack the parameters + Theta1, Theta2 = self.__unpackParameters( + self.nn_params, input_layer_size, self.getHiddenN(), num_labels + ) + self.theta1 = Theta1 + self.theta2 = Theta2 + self.num_labels = num_labels + self.m = m + + return self + + def predict(self, test): + """ + + Predicts labels of TEST patterns labels. The object needs to be fitted to the data first. + + Parameters + ---------- + + test: {array-like, sparse matrix}, shape (n_samples, n_features) + test patterns array, where n_samples is the number of samples + and n_features is the number of features + + Returns + ------- + + predicted: {array-like, sparse matrix}, shape (n_samples,) + Vector array with predicted values for each pattern of test patterns. + + """ + # Check is fit had been called + check_is_fitted(self) + + # Input validation + test = check_array(test) + m = test.shape[0] + + a1 = np.append(np.ones((m, 1)), test, axis=1) + z2 = np.append(np.ones((m, 1)), np.matmul(a1, self.theta1.T), axis=1) + + a2 = 1.0 / (1.0 + np.exp(-z2)) + projected = np.matmul(a2, self.theta2.T) + projected = 1.0 / (1.0 + np.exp(-projected)) + + a3 = np.multiply( + np.where(np.append(projected, np.ones((m, 1)), axis=1) > 0.5, 1, 0), + np.tile(np.arange(1, self.num_labels + 1), (m, 1)), + ) + a3[np.where(a3 == 0)] = self.num_labels + 1 + predicted = a3.min(axis=1) + + return predicted + + # --------Getters & Setters (Public Access)-------- + + # Getter & Setter of "epsilonInit" + def getEpsilonInit(self): + """ - Parameters - ---------- + This method returns the value of the variable self.epsilonInit. + self.epsilonInit contains the value of epsilon, which is the initialization range of the weights. - test: {array-like, sparse matrix}, shape (n_samples, n_features) - test patterns array, where n_samples is the number of samples - and n_features is the number of features + """ - Returns - ------- + return self.epsilonInit - predicted: {array-like, sparse matrix}, shape (n_samples,) - Vector array with predicted values for each pattern of test patterns. + def setEpsilonInit(self, epsilonInit): + """ - """ - # Check is fit had been called - check_is_fitted(self) - - # Input validation - test = check_array(test) - m = test.shape[0] + This method modify the value of the variable self.epsilonInit. + This is replaced by the value contained in the epsilonInit variable passed as an argument. - a1 = np.append(np.ones((m, 1)), test, axis=1) - z2 = np.append(np.ones((m,1)), np.matmul(a1, self.theta1.T), axis=1) + """ - a2 = 1.0 / (1.0 + np.exp(-z2)) - projected = np.matmul(a2,self.theta2.T) - projected = 1.0 / (1.0 + np.exp(-projected)) + self.epsilonInit = epsilonInit - a3 = np.multiply(np.where(np.append(projected, np.ones((m,1)), axis=1)>0.5, 1, 0), - np.tile(np.arange(1,self.num_labels+1), (m,1))) - a3[np.where(a3==0)] = self.num_labels + 1 - predicted = a3.min(axis=1) + # Getter & Setter of "hiddenN" + def getHiddenN(self): + """ - return predicted - - #--------Getters & Setters (Public Access)-------- - + This method returns the value of the variable self.hiddenN. + self.hiddenN contains the number of nodes/neurons in the hidden layer. - # Getter & Setter of "epsilonInit" - def getEpsilonInit (self): - - """ + """ - This method returns the value of the variable self.epsilonInit. - self.epsilonInit contains the value of epsilon, which is the initialization range of the weights. + return self.hiddenN - """ + def setHiddenN(self, hiddenN): + """ - return self.epsilonInit + This method modify the value of the variable self.hiddenN. + This is replaced by the value contained in the hiddenN variable passed as an argument. - def setEpsilonInit (self, epsilonInit): + """ - """ + self.hiddenN = hiddenN - This method modify the value of the variable self.epsilonInit. - This is replaced by the value contained in the epsilonInit variable passed as an argument. + # Getter & Setter of "iterations" + def getIterations(self): + """ - """ + This method returns the value of the variable self.iterations. + self.iterations contains the number of iterations. - self.epsilonInit = epsilonInit - + """ - # Getter & Setter of "hiddenN" - def getHiddenN (self): + return self.iterations - """ + def setIterations(self, iterations): + """ - This method returns the value of the variable self.hiddenN. - self.hiddenN contains the number of nodes/neurons in the hidden layer. + This method modify the value of the variable self.iterations. + This is replaced by the value contained in the iterations variable passed as an argument. - """ + """ - return self.hiddenN + self.iterations = iterations - def setHiddenN (self, hiddenN): - - """ + # Getter & Setter of "lambdaValue" + def getLambdaValue(self): + """ - This method modify the value of the variable self.hiddenN. - This is replaced by the value contained in the hiddenN variable passed as an argument. + This method returns the value of the variable self.lambdaValue. + self.lambdaValue contains the Lambda parameter used in regularization. - """ + """ - self.hiddenN = hiddenN - + return self.lambdaValue - # Getter & Setter of "iterations" - def getIterations (self): - - """ + def setLambdaValue(self, lambdaValue): + """ - This method returns the value of the variable self.iterations. - self.iterations contains the number of iterations. + This method modify the value of the variable self.lambdaValue. + This is replaced by the value contained in the lambdaValue variable passed as an argument. - """ + """ - return self.iterations - - def setIterations (self, iterations): + self.lambdaValue = lambdaValue - """ + # Getter & Setter of "theta1" + def getTheta1(self): + """ - This method modify the value of the variable self.iterations. - This is replaced by the value contained in the iterations variable passed as an argument. + This method returns the value of the variable self.theta1. + self.theta1 contains an array with the weights of the hidden layer (with biases included). - """ + """ - self.iterations = iterations - + return self.theta1 - # Getter & Setter of "lambdaValue" - def getLambdaValue (self): + def setTheta1(self, theta1): + """ - """ + This method modify the value of the variable self.theta1. + This is replaced by the value contained in the theta1 variable passed as an argument. - This method returns the value of the variable self.lambdaValue. - self.lambdaValue contains the Lambda parameter used in regularization. + """ - """ + self.theta1 = theta1 - return self.lambdaValue - - def setLambdaValue (self, lambdaValue): + # Getter & Setter of "theta2" + def getTheta2(self): + """ - """ + This method returns the value of the variable self.theta2. + self.theta2 contains an array with output layer weigths. - This method modify the value of the variable self.lambdaValue. - This is replaced by the value contained in the lambdaValue variable passed as an argument. + """ - """ + return self.theta2 - self.lambdaValue = lambdaValue + def setTheta2(self, theta2): + """ + This method modify the value of the variable self.theta2. + This is replaced by the value contained in the theta2 variable passed as an argument. - # Getter & Setter of "theta1" - def getTheta1 (self): - - """ + """ - This method returns the value of the variable self.theta1. - self.theta1 contains an array with the weights of the hidden layer (with biases included). + self.theta2 = theta2 - """ + # Getter & Setter of "num_labels" + def getNum_labels(self): + """ - return self.theta1 + This method returns the value of the variable self.num_labels. + self.num_labels contains the number of labels in the problem. - def setTheta1 (self, theta1): - - """ + """ - This method modify the value of the variable self.theta1. - This is replaced by the value contained in the theta1 variable passed as an argument. + return self.num_labels - """ + def setNum_labels(self, num_labels): + """ - self.theta1 = theta1 - + This method modify the value of the variable self.num_labels. + This is replaced by the value contained in the num_labels variable passed as an argument. - # Getter & Setter of "theta2" - def getTheta2 (self): - - """ + """ - This method returns the value of the variable self.theta2. - self.theta2 contains an array with output layer weigths. + self.num_labels = num_labels - """ + # Getter & Setter of "m" + def getM(self): + """ - return self.theta2 - - def setTheta2 (self, theta2): - - """ + This method returns the value of the variable self.m. + self.m contains the number of samples of X (train patterns array). - This method modify the value of the variable self.theta2. - This is replaced by the value contained in the theta2 variable passed as an argument. - - """ + """ - self.theta2 = theta2 + return self.m - # Getter & Setter of "num_labels" - def getNum_labels (self): - - """ + def setM(self, m): + """ - This method returns the value of the variable self.num_labels. - self.num_labels contains the number of labels in the problem. - - """ + This method modify the value of the variable self.m. + This is replaced by the value contained in the m variable passed as an argument. - return self.num_labels - - def setNum_labels (self, num_labels): - - """ + """ - This method modify the value of the variable self.num_labels. - This is replaced by the value contained in the num_labels variable passed as an argument. - - """ + self.m = m - self.num_labels = num_labels + # --------------Private Access functions------------------ + # Download and save the values ​​of Theta1, Theta2 and thresholds_param + # from the nn_params array to their corresponding array + def __unpackParameters( + self, nn_params, input_layer_size, hidden_layer_size, num_labels + ): + """ - # Getter & Setter of "m" - def getM (self): - - """ + This method gets Theta1 and Theta2 back from the whole array nn_params. - This method returns the value of the variable self.m. - self.m contains the number of samples of X (train patterns array). - - """ + Parameters + ---------- - return self.m - - def setM (self, m): - - """ + nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size + + hidden_layer_size + (num_labels-1)) + Array that is a column vector. It stores the values ​​of Theta1, + Theta2 and thresholds_param, all of them together in an array in this order. - This method modify the value of the variable self.m. - This is replaced by the value contained in the m variable passed as an argument. - - """ + input_layer_size: integer + Number of nodes in the input layer of the neural network model. - self.m = m + hidden_layer_size: integer + Number of nodes in the hidden layer of the neural network model. - #--------------Private Access functions------------------ + num_labels: integer + Number of classes. - # Download and save the values ​​of Theta1, Theta2 and thresholds_param - # from the nn_params array to their corresponding array - def __unpackParameters(self, nn_params, input_layer_size, hidden_layer_size, num_labels): - - """ + Returns + ------- - This method gets Theta1 and Theta2 back from the whole array nn_params. + Theta1: The weights between the input layer and the hidden layer (with biases included). - Parameters - ---------- + Theta2: The weights between the hidden layer and the output layer. - nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size - + hidden_layer_size + (num_labels-1)) - Array that is a column vector. It stores the values ​​of Theta1, - Theta2 and thresholds_param, all of them together in an array in this order. + """ - input_layer_size: integer - Number of nodes in the input layer of the neural network model. - - hidden_layer_size: integer - Number of nodes in the hidden layer of the neural network model. - - num_labels: integer - Number of classes. + nTheta1 = hidden_layer_size * (input_layer_size + 1) + Theta1 = np.reshape( + nn_params[0:nTheta1], (hidden_layer_size, (input_layer_size + 1)), order="F" + ) + Theta2 = np.reshape( + nn_params[nTheta1:], (num_labels - 1, hidden_layer_size + 1), order="F" + ) - Returns - ------- + return Theta1, Theta2 - Theta1: The weights between the input layer and the hidden layer (with biases included). + # Randomly initialize the weights of the neural network layer + # by entering the number of input and output nodes of that layer + def __randInitializeWeights(self, L_in, L_out): + """ - Theta2: The weights between the hidden layer and the output layer. + This method randomly initializes the weights of a layer + with L_in incoming connections and L_out outgoing connections - """ + Parameters + ---------- - nTheta1 = hidden_layer_size * (input_layer_size + 1) - Theta1 = np.reshape(nn_params[0:nTheta1],(hidden_layer_size, - (input_layer_size + 1)),order='F') - - Theta2 = np.reshape(nn_params[nTheta1:], (num_labels-1, - hidden_layer_size+1),order='F') - - return Theta1, Theta2 - + L_in: integer + Number of inputs of the layer. - # Randomly initialize the weights of the neural network layer - # by entering the number of input and output nodes of that layer - def __randInitializeWeights(self, L_in, L_out): + L_out: integer + Number of outputs of the layer. - """ + Returns + ------- - This method randomly initializes the weights of a layer - with L_in incoming connections and L_out outgoing connections + W: Array with the weights of each synaptic relationship between nodes. - Parameters - ---------- + """ - L_in: integer - Number of inputs of the layer. + W = ( + np.random.rand(L_out, L_in) * 2 * self.getEpsilonInit() + - self.getEpsilonInit() + ) - L_out: integer - Number of outputs of the layer. - - Returns - ------- + return W - W: Array with the weights of each synaptic relationship between nodes. + # Implements the cost function and obtains the corresponding derivatives. + def __nnOPCostFunction( + self, + nn_params, + input_layer_size, + hidden_layer_size, + num_labels, + X, + Y, + lambdaValue, + ): + """ + This method implements the cost function and obtains + the corresponding derivatives. - """ + Parameters + ---------- - W = np.random.rand(L_out,L_in)*2*self.getEpsilonInit() - self.getEpsilonInit() + nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size + + hidden_layer_size) - return W + Array that is a column vector. It stores the values ​​of Theta1 and + Theta2, all of them together in an array in this order. + input_layer_size: integer + Number of nodes in the input layer of the neural network model. - # Implements the cost function and obtains the corresponding derivatives. - def __nnOPCostFunction(self, nn_params, input_layer_size, hidden_layer_size, - num_labels, X, Y, lambdaValue): - - """ - This method implements the cost function and obtains - the corresponding derivatives. - - Parameters - ---------- + hidden_layer_size: integer + Number of nodes in the hidden layer of the neural network model. - nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size - + hidden_layer_size) - - Array that is a column vector. It stores the values ​​of Theta1 and - Theta2, all of them together in an array in this order. - - input_layer_size: integer - Number of nodes in the input layer of the neural network model. - - hidden_layer_size: integer - Number of nodes in the hidden layer of the neural network model. - - num_labels: integer - Number of classes. + num_labels: integer + Number of classes. - X: {array-like, sparse matrix}, shape (n_samples, n_features) - Training patterns array, where n_samples is the number of samples - and n_features is the number of features + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of samples + and n_features is the number of features - Y: array-like, shape (n_samples) - Target vector relative to X + Y: array-like, shape (n_samples) + Target vector relative to X - lambdaValue: - Regularization parameter. + lambdaValue: + Regularization parameter. - Returns - ------- + Returns + ------- - J: Matrix with cost function (updated weight matrix). - grad: Array with the error gradient of each weight of each layer. + J: Matrix with cost function (updated weight matrix). + grad: Array with the error gradient of each weight of each layer. - """ + """ - # Unroll all the parameters - Theta1,Theta2 = self.__unpackParameters(nn_params,input_layer_size, hidden_layer_size, num_labels) + # Unroll all the parameters + Theta1, Theta2 = self.__unpackParameters( + nn_params, input_layer_size, hidden_layer_size, num_labels + ) - # Setup some useful variables - m = np.size(X, 0) + # Setup some useful variables + m = np.size(X, 0) - # Neural Network model - a1 = np.append(np.ones((m, 1)), X, axis=1) - z2 = np.matmul(a1,Theta1.T) - a2 = np.append(np.ones((m, 1)), 1.0 / (1.0 + np.exp(-z2)), axis=1) - z3 = np.matmul(a2,Theta2.T) - h = np.append(1.0 / (1.0 + np.exp(-z3)), np.ones((m, 1)), axis=1) + # Neural Network model + a1 = np.append(np.ones((m, 1)), X, axis=1) + z2 = np.matmul(a1, Theta1.T) + a2 = np.append(np.ones((m, 1)), 1.0 / (1.0 + np.exp(-z2)), axis=1) + z3 = np.matmul(a2, Theta2.T) + h = np.append(1.0 / (1.0 + np.exp(-z3)), np.ones((m, 1)), axis=1) - # Final output - out = h + # Final output + out = h - # Calculate penalty (regularización L2) - p = np.sum((Theta1[:,1:]**2).sum() + (Theta2[:,1:]**2).sum()) + # Calculate penalty (regularización L2) + p = np.sum((Theta1[:, 1:] ** 2).sum() + (Theta2[:, 1:] ** 2).sum()) - # MSE - J = np.sum((out-Y)**2).sum()/(2*m) + lambdaValue*p/(2*m) + # MSE + J = np.sum((out - Y) ** 2).sum() / (2 * m) + lambdaValue * p / (2 * m) - # MSE - errorDer = (out-Y) + # MSE + errorDer = out - Y - # Calculate sigmas - sigma3 = np.multiply(np.multiply(errorDer,h), (1-h)) - sigma3 = sigma3[:,:-1] + # Calculate sigmas + sigma3 = np.multiply(np.multiply(errorDer, h), (1 - h)) + sigma3 = sigma3[:, :-1] - sigma2 = np.multiply(np.multiply(np.matmul(sigma3, Theta2), a2), (1-a2)) - sigma2 = sigma2[:,1:] + sigma2 = np.multiply(np.multiply(np.matmul(sigma3, Theta2), a2), (1 - a2)) + sigma2 = sigma2[:, 1:] - # Accumulate gradients - delta_1 = np.matmul(sigma2.T, a1) - delta_2 = np.matmul(sigma3.T, a2) + # Accumulate gradients + delta_1 = np.matmul(sigma2.T, a1) + delta_2 = np.matmul(sigma3.T, a2) - # Calculate regularized gradient - p1 = (lambdaValue/m) * np.concatenate((np.zeros((np.size(Theta1, axis=0), 1)), Theta1[:,1:]), axis=1) - p2 = (lambdaValue/m) * np.concatenate((np.zeros((np.size(Theta2, axis=0), 1)), Theta2[:,1:]), axis=1) - Theta1_grad = delta_1 / m + p1 - Theta2_grad = delta_2 / m + p2 + # Calculate regularized gradient + p1 = (lambdaValue / m) * np.concatenate( + (np.zeros((np.size(Theta1, axis=0), 1)), Theta1[:, 1:]), axis=1 + ) + p2 = (lambdaValue / m) * np.concatenate( + (np.zeros((np.size(Theta2, axis=0), 1)), Theta2[:, 1:]), axis=1 + ) + Theta1_grad = delta_1 / m + p1 + Theta2_grad = delta_2 / m + p2 - # Unroll gradients - grad = np.concatenate((Theta1_grad.flatten(order='F'), - Theta2_grad.flatten(order='F')),axis=0) + # Unroll gradients + grad = np.concatenate( + (Theta1_grad.flatten(order="F"), Theta2_grad.flatten(order="F")), axis=0 + ) - return J,grad - + return J, grad diff --git a/orca_python/classifiers/NNPOM.py b/orca_python/classifiers/NNPOM.py index 00315df..54b8274 100644 --- a/orca_python/classifiers/NNPOM.py +++ b/orca_python/classifiers/NNPOM.py @@ -1,650 +1,700 @@ # encoding: utf-8 -import numpy as np import math as math + +import numpy as np +import scipy from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import unique_labels -import scipy +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y + class NNPOM(BaseEstimator, ClassifierMixin): - - """ - - NNPOM Neural Network based on Proportional Odd Model (NNPOM). This - class implements a neural network model for ordinal regression. The - model has one hidden layer with hiddenN neurons and one outputlayer - with only one neuron but as many threshold as the number of classes - minus one. The standard POM model is applied in this neuron to have - probabilistic outputs. The learning is based on iRProp+ algorithm and - the implementation provided by Roberto Calandra in his toolbox Rprop - Toolbox for {MATLAB}: - http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox - The model is adjusted by minimizing cross entropy. A regularization - parameter "lambda" is included based on L2, and the number of - iterations is specified by the "iterations" parameter. - - NNPOM public methods: - fit - Fits a model from training data - predict - Performs label prediction - - References: - [1] P. McCullagh, Regression models for ordinal data, Journal of - the Royal Statistical Society. Series B (Methodological), vol. 42, - no. 2, pp. 109–142, 1980. - [2] M. J. Mathieson, Ordinal models for neural networks, in Proc. - 3rd Int. Conf. Neural Netw. Capital Markets, 1996, pp. - 523-536. - [3] P.A. Gutiérrez, M. Pérez-Ortiz, J. Sánchez-Monedero, - F. Fernández-Navarro and C. Hervás-Martínez - Ordinal regression methods: survey and experimental study - IEEE Transactions on Knowledge and Data Engineering, Vol. 28. - Issue 1, 2016 - http://dx.doi.org/10.1109/TKDE.2015.2457911 - - This file is part of ORCA: https://github.com/ayrna/orca - Original authors: Pedro Antonio Gutiérrez, María Pérez Ortiz, Javier Sánchez Monedero - Citation: If you use this code, please cite the associated paper http://www.uco.es/grupos/ayrna/orreview - Copyright: - This software is released under the The GNU General Public License v3.0 licence - available at http://www.gnu.org/licenses/gpl-3.0.html - - - NNPOM properties: - epsilonInit - Range for initializing the weights. - hiddenN - Number of hidden neurons of the - model. - iterations - Number of iterations for fmin_l_bfgs_b - algorithm. - lambdaValue - Regularization parameter. - theta1 - Hidden layer weigths (with bias) - theta2 - Output layer weigths (without bias, the biases will be the thresholds) - thresholds - Class thresholds parameters - num_labels - Number of labels in the problem - m - Number of samples of X (train patterns array). - - """ - - # Constructor of class NNPOM (set parameters values). - def __init__(self, epsilonInit=0.5, hiddenN=50, iterations=500, lambdaValue=0.01): - - self.epsilonInit = epsilonInit - self.hiddenN = hiddenN - self.iterations = iterations - self.lambdaValue = lambdaValue - - - #--------Main functions (Public Access)-------- - - - def fit(self,X,y): - - """ - - Trains the model for the model NNPOM method with TRAIN data. - Returns the projection of patterns (only valid for threshold models) and the predicted labels. - - Parameters - ---------- - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - Training patterns array, where n_samples is the number of samples - and n_features is the number of features - - y: array-like, shape (n_samples) - Target vector relative to X - - Returns - ------- - - self: The object NNPOM. - - """ - if self.epsilonInit < 0 or self.hiddenN < 1 or self.iterations < 1 or self.lambdaValue < 0: - return None - - - # Check that X and y have correct shape - X, y = check_X_y(X, y) - # Store the classes seen during fit - self.classes_ = unique_labels(y) - - # Aux variables - y = y[:,np.newaxis] - input_layer_size = X.shape[1] - num_labels = np.size(np.unique(y)) - m = X.shape[0] - - # Recode y to Y using nominal coding - Y = 1 * (np.tile(y, (1,num_labels)) == np.tile(np.arange(1,num_labels+1)[np.newaxis,:], (m,1))) - - # Hidden layer weigths (with bias) - initial_Theta1 = self.__randInitializeWeights(input_layer_size+1, self.getHiddenN()) - # Output layer weigths (without bias, the biases will be the thresholds) - initial_Theta2 = self.__randInitializeWeights(self.getHiddenN(), 1) - # Class thresholds parameters - initial_thresholds = self.__randInitializeWeights((num_labels-1),1) - - # Pack parameters - initial_nn_params = np.concatenate((initial_Theta1.flatten(order='F'), - initial_Theta2.flatten(order='F'), initial_thresholds.flatten(order='F')), - axis=0)[:,np.newaxis] - - results_optimization = scipy.optimize.fmin_l_bfgs_b(func=self.__nnPOMCostFunction, x0=initial_nn_params.ravel(),args=(input_layer_size, self.hiddenN, - num_labels, X, Y, self.lambdaValue), fprime=None, factr=1e3, maxiter=self.iterations,iprint=-1) - - self.nn_params = results_optimization[0] - - # Unpack the parameters - Theta1, Theta2, thresholds_param = self.__unpackParameters(self.nn_params, input_layer_size, - self.getHiddenN(), num_labels) - - self.theta1 = Theta1 - self.theta2 = Theta2 - self.thresholds = self.__convertThresholds(thresholds_param, num_labels) - self.num_labels = num_labels - self.m = m - - return self - - def predict (self, test): - - """ - - Predicts labels of TEST patterns labels. The object needs to be fitted to the data first. - - Parameters - ---------- - test: {array-like, sparse matrix}, shape (n_samples, n_features) - test patterns array, where n_samples is the number of samples - and n_features is the number of features + """ + + NNPOM Neural Network based on Proportional Odd Model (NNPOM). This + class implements a neural network model for ordinal regression. The + model has one hidden layer with hiddenN neurons and one outputlayer + with only one neuron but as many threshold as the number of classes + minus one. The standard POM model is applied in this neuron to have + probabilistic outputs. The learning is based on iRProp+ algorithm and + the implementation provided by Roberto Calandra in his toolbox Rprop + Toolbox for {MATLAB}: + http://www.ias.informatik.tu-darmstadt.de/Research/RpropToolbox + The model is adjusted by minimizing cross entropy. A regularization + parameter "lambda" is included based on L2, and the number of + iterations is specified by the "iterations" parameter. + + NNPOM public methods: + fit - Fits a model from training data + predict - Performs label prediction + + References: + [1] P. McCullagh, Regression models for ordinal data, Journal of + the Royal Statistical Society. Series B (Methodological), vol. 42, + no. 2, pp. 109–142, 1980. + [2] M. J. Mathieson, Ordinal models for neural networks, in Proc. + 3rd Int. Conf. Neural Netw. Capital Markets, 1996, pp. + 523-536. + [3] P.A. Gutiérrez, M. Pérez-Ortiz, J. Sánchez-Monedero, + F. Fernández-Navarro and C. Hervás-Martínez + Ordinal regression methods: survey and experimental study + IEEE Transactions on Knowledge and Data Engineering, Vol. 28. + Issue 1, 2016 + http://dx.doi.org/10.1109/TKDE.2015.2457911 + + This file is part of ORCA: https://github.com/ayrna/orca + Original authors: Pedro Antonio Gutiérrez, María Pérez Ortiz, Javier Sánchez Monedero + Citation: If you use this code, please cite the associated paper http://www.uco.es/grupos/ayrna/orreview + Copyright: + This software is released under the The GNU General Public License v3.0 licence + available at http://www.gnu.org/licenses/gpl-3.0.html + + + NNPOM properties: + epsilonInit - Range for initializing the weights. + hiddenN - Number of hidden neurons of the + model. + iterations - Number of iterations for fmin_l_bfgs_b + algorithm. + lambdaValue - Regularization parameter. + theta1 - Hidden layer weigths (with bias) + theta2 - Output layer weigths (without bias, the biases will be the thresholds) + thresholds - Class thresholds parameters + num_labels - Number of labels in the problem + m - Number of samples of X (train patterns array). + + """ + + # Constructor of class NNPOM (set parameters values). + def __init__(self, epsilonInit=0.5, hiddenN=50, iterations=500, lambdaValue=0.01): + self.epsilonInit = epsilonInit + self.hiddenN = hiddenN + self.iterations = iterations + self.lambdaValue = lambdaValue + + # --------Main functions (Public Access)-------- + + def fit(self, X, y): + """ + + Trains the model for the model NNPOM method with TRAIN data. + Returns the projection of patterns (only valid for threshold models) and the predicted labels. + + Parameters + ---------- + + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of samples + and n_features is the number of features + + y: array-like, shape (n_samples) + Target vector relative to X + + Returns + ------- + + self: The object NNPOM. + + """ + if ( + self.epsilonInit < 0 + or self.hiddenN < 1 + or self.iterations < 1 + or self.lambdaValue < 0 + ): + return None + + # Check that X and y have correct shape + X, y = check_X_y(X, y) + # Store the classes seen during fit + self.classes_ = unique_labels(y) + + # Aux variables + y = y[:, np.newaxis] + input_layer_size = X.shape[1] + num_labels = np.size(np.unique(y)) + m = X.shape[0] + + # Recode y to Y using nominal coding + Y = 1 * ( + np.tile(y, (1, num_labels)) + == np.tile(np.arange(1, num_labels + 1)[np.newaxis, :], (m, 1)) + ) + + # Hidden layer weigths (with bias) + initial_Theta1 = self.__randInitializeWeights( + input_layer_size + 1, self.getHiddenN() + ) + # Output layer weigths (without bias, the biases will be the thresholds) + initial_Theta2 = self.__randInitializeWeights(self.getHiddenN(), 1) + # Class thresholds parameters + initial_thresholds = self.__randInitializeWeights((num_labels - 1), 1) + + # Pack parameters + initial_nn_params = np.concatenate( + ( + initial_Theta1.flatten(order="F"), + initial_Theta2.flatten(order="F"), + initial_thresholds.flatten(order="F"), + ), + axis=0, + )[:, np.newaxis] + + results_optimization = scipy.optimize.fmin_l_bfgs_b( + func=self.__nnPOMCostFunction, + x0=initial_nn_params.ravel(), + args=(input_layer_size, self.hiddenN, num_labels, X, Y, self.lambdaValue), + fprime=None, + factr=1e3, + maxiter=self.iterations, + iprint=-1, + ) + + self.nn_params = results_optimization[0] + + # Unpack the parameters + Theta1, Theta2, thresholds_param = self.__unpackParameters( + self.nn_params, input_layer_size, self.getHiddenN(), num_labels + ) + + self.theta1 = Theta1 + self.theta2 = Theta2 + self.thresholds = self.__convertThresholds(thresholds_param, num_labels) + self.num_labels = num_labels + self.m = m + + return self + + def predict(self, test): + """ + + Predicts labels of TEST patterns labels. The object needs to be fitted to the data first. + + Parameters + ---------- + + test: {array-like, sparse matrix}, shape (n_samples, n_features) + test patterns array, where n_samples is the number of samples + and n_features is the number of features + + Returns + ------- + + predicted: {array-like, sparse matrix}, shape (n_samples,) + Vector array with predicted values for each pattern of test patterns. + + """ + + # Check is fit had been called + check_is_fitted(self) + + # Input validation + test = check_array(test) + + m = test.shape[0] + + a1 = np.append(np.ones((m, 1)), test, axis=1) + z2 = np.matmul(a1, self.theta1.T) + a2 = 1.0 / (1.0 + np.exp(-z2)) + projected = np.matmul(a2, self.theta2.T) + + z3 = np.tile(self.thresholds, (m, 1)) - np.tile( + projected, (1, self.num_labels - 1) + ) + a3T = 1.0 / (1.0 + np.exp(-z3)) + a3 = np.append(a3T, np.ones((m, 1)), axis=1) + a3[:, 1:] = a3[:, 1:] - a3[:, 0:-1] + predicted = a3.argmax(1) + 1 + + return predicted + + # --------Getters & Setters (Public Access)-------- + + # Getter & Setter of "epsilonInit" + def getEpsilonInit(self): + """ + + This method returns the value of the variable self.epsilonInit. + self.epsilonInit contains the value of epsilon, which is the initialization range of the weights. + + """ + + return self.epsilonInit + + def setEpsilonInit(self, epsilonInit): + """ + + This method modify the value of the variable self.epsilonInit. + This is replaced by the value contained in the epsilonInit variable passed as an argument. + + """ + + self.epsilonInit = epsilonInit + + # Getter & Setter of "hiddenN" + def getHiddenN(self): + """ + + This method returns the value of the variable self.hiddenN. + self.hiddenN contains the number of nodes/neurons in the hidden layer. + + """ + + return self.hiddenN + + def setHiddenN(self, hiddenN): + """ + + This method modify the value of the variable self.hiddenN. + This is replaced by the value contained in the hiddenN variable passed as an argument. + + """ + + self.hiddenN = hiddenN - Returns - ------- + # Getter & Setter of "iterations" + def getIterations(self): + """ - predicted: {array-like, sparse matrix}, shape (n_samples,) - Vector array with predicted values for each pattern of test patterns. + This method returns the value of the variable self.iterations. + self.iterations contains the number of iterations. - """ + """ - # Check is fit had been called - check_is_fitted(self) - - # Input validation - test = check_array(test) - - m = test.shape[0] + return self.iterations - a1 = np.append(np.ones((m, 1)), test, axis=1) - z2 = np.matmul(a1,self.theta1.T) - a2 = 1.0 / (1.0 + np.exp(-z2)) - projected = np.matmul(a2,self.theta2.T) + def setIterations(self, iterations): + """ - z3 = np.tile(self.thresholds, (m,1)) - np.tile(projected, (1, self.num_labels-1)) - a3T = 1.0 / (1.0 + np.exp(-z3)) - a3 = np.append(a3T, np.ones((m,1)), axis=1) - a3[:,1:] = a3[:,1:] - a3[:,0:-1] - predicted = a3.argmax(1) + 1 + This method modify the value of the variable self.iterations. + This is replaced by the value contained in the iterations variable passed as an argument. - return predicted - - #--------Getters & Setters (Public Access)-------- - + """ - # Getter & Setter of "epsilonInit" - def getEpsilonInit (self): - - """ + self.iterations = iterations - This method returns the value of the variable self.epsilonInit. - self.epsilonInit contains the value of epsilon, which is the initialization range of the weights. + # Getter & Setter of "lambdaValue" + def getLambdaValue(self): + """ - """ + This method returns the value of the variable self.lambdaValue. + self.lambdaValue contains the Lambda parameter used in regularization. - return self.epsilonInit + """ - def setEpsilonInit (self, epsilonInit): + return self.lambdaValue - """ + def setLambdaValue(self, lambdaValue): + """ - This method modify the value of the variable self.epsilonInit. - This is replaced by the value contained in the epsilonInit variable passed as an argument. + This method modify the value of the variable self.lambdaValue. + This is replaced by the value contained in the lambdaValue variable passed as an argument. - """ + """ - self.epsilonInit = epsilonInit - + self.lambdaValue = lambdaValue - # Getter & Setter of "hiddenN" - def getHiddenN (self): + # Getter & Setter of "theta1" + def getTheta1(self): + """ - """ + This method returns the value of the variable self.theta1. + self.theta1 contains an array with the weights of the hidden layer (with biases included). - This method returns the value of the variable self.hiddenN. - self.hiddenN contains the number of nodes/neurons in the hidden layer. + """ - """ + return self.theta1 - return self.hiddenN + def setTheta1(self, theta1): + """ - def setHiddenN (self, hiddenN): - - """ + This method modify the value of the variable self.theta1. + This is replaced by the value contained in the theta1 variable passed as an argument. - This method modify the value of the variable self.hiddenN. - This is replaced by the value contained in the hiddenN variable passed as an argument. + """ - """ + self.theta1 = theta1 - self.hiddenN = hiddenN - + # Getter & Setter of "theta2" + def getTheta2(self): + """ - # Getter & Setter of "iterations" - def getIterations (self): - - """ + This method returns the value of the variable self.theta2. + self.theta2 contains an array with output layer weigths (without bias, the biases will be the thresholds) - This method returns the value of the variable self.iterations. - self.iterations contains the number of iterations. + """ - """ + return self.theta2 - return self.iterations - - def setIterations (self, iterations): + def setTheta2(self, theta2): + """ - """ + This method modify the value of the variable self.theta2. + This is replaced by the value contained in the theta2 variable passed as an argument. - This method modify the value of the variable self.iterations. - This is replaced by the value contained in the iterations variable passed as an argument. + """ - """ + self.theta2 = theta2 - self.iterations = iterations - + # Getter & Setter of "thresholds" + def getThresholds(self): + """ - # Getter & Setter of "lambdaValue" - def getLambdaValue (self): + This method returns the value of the variable self.thresholds. + self.thresholds contains an array with the class thresholds parameters. - """ + """ - This method returns the value of the variable self.lambdaValue. - self.lambdaValue contains the Lambda parameter used in regularization. + return self.thresholds - """ + def setThresholds(self, thresholds): + """ - return self.lambdaValue - - def setLambdaValue (self, lambdaValue): + This method modify the value of the variable self.thresholds. + This is replaced by the value contained in the thresholds variable passed as an argument. - """ + """ - This method modify the value of the variable self.lambdaValue. - This is replaced by the value contained in the lambdaValue variable passed as an argument. + self.thresholds = thresholds - """ + # Getter & Setter of "num_labels" + def getNum_labels(self): + """ - self.lambdaValue = lambdaValue + This method returns the value of the variable self.num_labels. + self.num_labels contains the number of labels in the problem. + """ - # Getter & Setter of "theta1" - def getTheta1 (self): - - """ + return self.num_labels - This method returns the value of the variable self.theta1. - self.theta1 contains an array with the weights of the hidden layer (with biases included). + def setNum_labels(self, num_labels): + """ - """ + This method modify the value of the variable self.num_labels. + This is replaced by the value contained in the num_labels variable passed as an argument. - return self.theta1 + """ - def setTheta1 (self, theta1): - - """ + self.num_labels = num_labels - This method modify the value of the variable self.theta1. - This is replaced by the value contained in the theta1 variable passed as an argument. + # Getter & Setter of "m" + def getM(self): + """ - """ + This method returns the value of the variable self.m. + self.m contains the number of samples of X (train patterns array). - self.theta1 = theta1 - + """ - # Getter & Setter of "theta2" - def getTheta2 (self): - - """ + return self.m - This method returns the value of the variable self.theta2. - self.theta2 contains an array with output layer weigths (without bias, the biases will be the thresholds) + def setM(self, m): + """ - """ + This method modify the value of the variable self.m. + This is replaced by the value contained in the m variable passed as an argument. - return self.theta2 - - def setTheta2 (self, theta2): - - """ + """ - This method modify the value of the variable self.theta2. - This is replaced by the value contained in the theta2 variable passed as an argument. - - """ + self.m = m - self.theta2 = theta2 + # --------------Private Access functions------------------ + # Download and save the values ​​of Theta1, Theta2 and thresholds_param + # from the nn_params array to their corresponding array + def __unpackParameters( + self, nn_params, input_layer_size, hidden_layer_size, num_labels + ): + """ - # Getter & Setter of "thresholds" - def getThresholds (self): - - """ + This method gets Theta1, Theta2 and thresholds_param back from the whole array nn_params. - This method returns the value of the variable self.thresholds. - self.thresholds contains an array with the class thresholds parameters. - - """ + Parameters + ---------- - return self.thresholds - - def setThresholds (self, thresholds): - - """ + nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size + + hidden_layer_size + (num_labels-1)) + Array that is a column vector. It stores the values ​​of Theta1, + Theta2 and thresholds_param, all of them together in an array in this order. - This method modify the value of the variable self.thresholds. - This is replaced by the value contained in the thresholds variable passed as an argument. - - """ + input_layer_size: integer + Number of nodes in the input layer of the neural network model. - self.thresholds = thresholds + hidden_layer_size: integer + Number of nodes in the hidden layer of the neural network model. + num_labels: integer + Number of classes. - # Getter & Setter of "num_labels" - def getNum_labels (self): - - """ - This method returns the value of the variable self.num_labels. - self.num_labels contains the number of labels in the problem. - - """ + Returns + ------- - return self.num_labels - - def setNum_labels (self, num_labels): - - """ + Theta1: The weights between the input layer and the hidden layer (with biases included). - This method modify the value of the variable self.num_labels. - This is replaced by the value contained in the num_labels variable passed as an argument. - - """ + Theta2: The weights between the hidden layer and the output layer + (biases are not included as they are the thresholds). - self.num_labels = num_labels + thresholds_param: classification thresholds. + """ - # Getter & Setter of "m" - def getM (self): - - """ + nTheta1 = hidden_layer_size * (input_layer_size + 1) + Theta1 = np.reshape( + nn_params[0:nTheta1], (hidden_layer_size, (input_layer_size + 1)), order="F" + ) - This method returns the value of the variable self.m. - self.m contains the number of samples of X (train patterns array). - - """ + nTheta2 = hidden_layer_size + Theta2 = np.reshape( + nn_params[nTheta1 : (nTheta1 + nTheta2)], (1, hidden_layer_size), order="F" + ) - return self.m - - def setM (self, m): - - """ + thresholds_param = np.reshape( + nn_params[(nTheta1 + nTheta2) :], ((num_labels - 1), 1), order="F" + ) - This method modify the value of the variable self.m. - This is replaced by the value contained in the m variable passed as an argument. - - """ + return Theta1, Theta2, thresholds_param - self.m = m + # Randomly initialize the weights of the neural network layer + # by entering the number of input and output nodes of that layer + def __randInitializeWeights(self, L_in, L_out): + """ - #--------------Private Access functions------------------ + This method randomly initializes the weights of a layer + with L_in incoming connections and L_out outgoing connections + Parameters + ---------- - # Download and save the values ​​of Theta1, Theta2 and thresholds_param - # from the nn_params array to their corresponding array - def __unpackParameters(self, nn_params, input_layer_size, hidden_layer_size, num_labels): - - """ + L_in: integer + Number of inputs of the layer. - This method gets Theta1, Theta2 and thresholds_param back from the whole array nn_params. + L_out: integer + Number of outputs of the layer. - Parameters - ---------- + Returns + ------- - nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size - + hidden_layer_size + (num_labels-1)) - Array that is a column vector. It stores the values ​​of Theta1, - Theta2 and thresholds_param, all of them together in an array in this order. + W: Array with the weights of each synaptic relationship between nodes. - input_layer_size: integer - Number of nodes in the input layer of the neural network model. - - hidden_layer_size: integer - Number of nodes in the hidden layer of the neural network model. - - num_labels: integer - Number of classes. + """ + W = ( + np.random.rand(L_out, L_in) * 2 * self.getEpsilonInit() + - self.getEpsilonInit() + ) - Returns - ------- + return W - Theta1: The weights between the input layer and the hidden layer (with biases included). + # Calculate the thresholds + def __convertThresholds(self, thresholds_param, num_labels): + """ - Theta2: The weights between the hidden layer and the output layer - (biases are not included as they are the thresholds). + This method transforms thresholds to perform unconstrained optimization. - thresholds_param: classification thresholds. - - """ + thresholds(1) = thresholds_param(1) + thresholds(2) = thresholds_param(1) + thresholds_param(2)^2 + thresholds(3) = thresholds_param(1) + thresholds_param(2)^2 + + thresholds_param(3)^2 - nTheta1 = hidden_layer_size * (input_layer_size + 1) - Theta1 = np.reshape(nn_params[0:nTheta1],(hidden_layer_size, - (input_layer_size + 1)),order='F') - - nTheta2 = hidden_layer_size - Theta2 = np.reshape(nn_params[nTheta1:(nTheta1+nTheta2)], - (1, hidden_layer_size),order='F') - - thresholds_param = np.reshape(nn_params[(nTheta1+nTheta2):], - ((num_labels-1), 1),order = 'F') - - return Theta1, Theta2, thresholds_param - - - # Randomly initialize the weights of the neural network layer - # by entering the number of input and output nodes of that layer - def __randInitializeWeights(self, L_in, L_out): - - """ - - This method randomly initializes the weights of a layer - with L_in incoming connections and L_out outgoing connections - - Parameters - ---------- + Parameters + ---------- - L_in: integer - Number of inputs of the layer. + thresholds_param: {array-like, column vector}, shape (num_labels-1, 1) + Contains the original value of the thresholds between classes - L_out: integer - Number of outputs of the layer. - - Returns - ------- + num_labels: integer + Number of classes. - W: Array with the weights of each synaptic relationship between nodes. - - """ + Returns + ------- - W = np.random.rand(L_out,L_in)*2*self.getEpsilonInit() - self.getEpsilonInit() - - return W + thresholds: thresholds of the line + """ - # Calculate the thresholds - def __convertThresholds(self, thresholds_param, num_labels): - - """ + # Threshold ^2 element by element + thresholds_pquad = thresholds_param**2 - This method transforms thresholds to perform unconstrained optimization. + # Gets row-array containing the thresholds + thresholds = np.reshape( + np.multiply( + np.tile( + np.concatenate( + (thresholds_param[0:1], thresholds_pquad[1:]), axis=0 + ), + (1, num_labels - 1), + ).T, + np.tril(np.ones((num_labels - 1, num_labels - 1))), + ).sum(axis=1), + (num_labels - 1, 1), + ).T - thresholds(1) = thresholds_param(1) - thresholds(2) = thresholds_param(1) + thresholds_param(2)^2 - thresholds(3) = thresholds_param(1) + thresholds_param(2)^2 - + thresholds_param(3)^2 + return thresholds - Parameters - ---------- + # Implements the cost function and obtains the corresponding derivatives. + def __nnPOMCostFunction( + self, + nn_params, + input_layer_size, + hidden_layer_size, + num_labels, + X, + Y, + lambdaValue, + ): + """ + This method implements the cost function and obtains + the corresponding derivatives. - thresholds_param: {array-like, column vector}, shape (num_labels-1, 1) - Contains the original value of the thresholds between classes - - num_labels: integer - Number of classes. - - Returns - ------- - - thresholds: thresholds of the line - - """ - - # Threshold ^2 element by element - thresholds_pquad=thresholds_param**2 - - # Gets row-array containing the thresholds - thresholds = np.reshape(np.multiply(np.tile(np.concatenate((thresholds_param[0:1], - thresholds_pquad[1:]), axis=0), (1, num_labels-1)).T, np.tril(np.ones((num_labels-1, - num_labels-1)))).sum(axis=1), (num_labels-1,1)).T - - return thresholds - - - # Implements the cost function and obtains the corresponding derivatives. - def __nnPOMCostFunction(self, nn_params, input_layer_size, hidden_layer_size, - num_labels, X, Y, lambdaValue): - - """ - This method implements the cost function and obtains - the corresponding derivatives. - - Parameters - ---------- - - nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size - + hidden_layer_size + (num_labels-1)) - - Array that is a column vector. It stores the values ​​of Theta1, - Theta2 and thresholds_param, all of them together in an array in this order. - - input_layer_size: integer - Number of nodes in the input layer of the neural network model. - - hidden_layer_size: integer - Number of nodes in the hidden layer of the neural network model. - - num_labels: integer - Number of classes. - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - Training patterns array, where n_samples is the number of samples - and n_features is the number of features - - Y: array-like, shape (n_samples) - Target vector relative to X - - lambdaValue: - Regularization parameter. - - Returns - ------- - - J: Matrix with cost function (updated weight matrix). - grad: Array with the error gradient of each weight of each layer. - - """ - - # Unroll all the parameters - nn_params = nn_params.reshape((nn_params.shape[0],1)) - - Theta1,Theta2,thresholds_param = self.__unpackParameters(nn_params, - input_layer_size, hidden_layer_size, num_labels) - - # Convert thresholds - thresholds = self.__convertThresholds(thresholds_param, num_labels) - - # Setup some useful variables - m = np.size(X, 0) - - # Neural Network model - a1 = np.append(np.ones((m, 1)), X, axis=1) - z2 = np.matmul(a1,Theta1.T) - a2 = 1.0 / (1.0 + np.exp(-z2)) - - z3 = np.tile(thresholds,(m,1)) - np.tile(np.matmul(a2,Theta2.T),(1, num_labels-1)) - a3T = 1.0 / (1.0 + np.exp(-z3)) - a3 = np.append(a3T, np.ones((m,1)), axis=1) - h = np.concatenate((a3[:,0].reshape((a3.shape[0],1)),a3[:,1:] - a3[:,0:-1]), axis = 1) - - # Final output - out = h - - # Calculate penalty (regularización L2) - p = np.sum((Theta1[:,1:]**2).sum() + (Theta2[:,0:]**2).sum()) - - # Cross entropy - J = np.sum(-np.log(out[np.where(Y==1)]), axis=0)/m + lambdaValue*p/(2*m) - - # Cross entropy - errorDer = np.zeros(Y.shape) - errorDer[np.where(Y!=0)] = np.divide(-Y[np.where(Y!=0)],out[np.where(Y!=0)]) - - # Calculate sigmas - fGradients = np.multiply(a3T,(1-a3T)) - gGradients = np.multiply(errorDer, np.concatenate((fGradients[:,0].reshape(-1,1), - (fGradients[:,1:] - fGradients[:,:-1]), -fGradients[:,-1].reshape(-1,1)), axis=1)) - sigma3 = -np.sum(gGradients,axis=1)[:,np.newaxis] - sigma2 = np.multiply(np.multiply(np.matmul(sigma3, Theta2), a2), (1-a2)) - - # Accumulate gradients - delta_1 = np.matmul(sigma2.T, a1) - delta_2 = np.matmul(sigma3.T, a2) - - # Calculate regularized gradient - p1 = (lambdaValue/m) * np.concatenate((np.zeros((np.size(Theta1, axis=0), 1)), Theta1[:,1:]), axis=1) - p2 = (lambdaValue/m) * Theta2[:,0:] - Theta1_grad = delta_1 / m + p1 - Theta2_grad = delta_2 / m + p2 - - # Treshold gradients - ThreshGradMatrix = np.multiply(np.concatenate((np.triu(np.ones((num_labels-1, num_labels-1))), - np.ones((num_labels-1, 1))), axis=1), np.tile(gGradients.sum(axis=0), (num_labels-1, 1))) - - originalShape = ThreshGradMatrix.shape - ThreshGradMatrix = ThreshGradMatrix.flatten(order='F') - - ThreshGradMatrix[(num_labels)::num_labels] = ThreshGradMatrix.flatten(order='F')[(num_labels)::num_labels] + np.multiply(errorDer[:,1:(num_labels-1)], - fGradients[:,0:(num_labels-2)]).sum(axis=0) - - ThreshGradMatrix = np.reshape(ThreshGradMatrix[:,np.newaxis],originalShape, order ='F') - - Threshold_grad = ThreshGradMatrix.sum(axis=1)[:,np.newaxis]/m - Threshold_grad[1:] = 2 * np.multiply(Threshold_grad[1:], thresholds_param[1:]) - - # Unroll gradients - grad = np.concatenate((Theta1_grad.flatten(order='F'), - Theta2_grad.flatten(order='F'), Threshold_grad.flatten(order='F')), - axis=0) - - return J,grad - + Parameters + ---------- + + nn_params: column array, shape ((imput_layer_size+1)*hidden_layer_size + + hidden_layer_size + (num_labels-1)) + + Array that is a column vector. It stores the values ​​of Theta1, + Theta2 and thresholds_param, all of them together in an array in this order. + + input_layer_size: integer + Number of nodes in the input layer of the neural network model. + + hidden_layer_size: integer + Number of nodes in the hidden layer of the neural network model. + + num_labels: integer + Number of classes. + + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of samples + and n_features is the number of features + + Y: array-like, shape (n_samples) + Target vector relative to X + + lambdaValue: + Regularization parameter. + + Returns + ------- + + J: Matrix with cost function (updated weight matrix). + grad: Array with the error gradient of each weight of each layer. + + """ + + # Unroll all the parameters + nn_params = nn_params.reshape((nn_params.shape[0], 1)) + + Theta1, Theta2, thresholds_param = self.__unpackParameters( + nn_params, input_layer_size, hidden_layer_size, num_labels + ) + + # Convert thresholds + thresholds = self.__convertThresholds(thresholds_param, num_labels) + + # Setup some useful variables + m = np.size(X, 0) + + # Neural Network model + a1 = np.append(np.ones((m, 1)), X, axis=1) + z2 = np.matmul(a1, Theta1.T) + a2 = 1.0 / (1.0 + np.exp(-z2)) + + z3 = np.tile(thresholds, (m, 1)) - np.tile( + np.matmul(a2, Theta2.T), (1, num_labels - 1) + ) + a3T = 1.0 / (1.0 + np.exp(-z3)) + a3 = np.append(a3T, np.ones((m, 1)), axis=1) + h = np.concatenate( + (a3[:, 0].reshape((a3.shape[0], 1)), a3[:, 1:] - a3[:, 0:-1]), axis=1 + ) + + # Final output + out = h + + # Calculate penalty (regularización L2) + p = np.sum((Theta1[:, 1:] ** 2).sum() + (Theta2[:, 0:] ** 2).sum()) + + # Cross entropy + J = np.sum(-np.log(out[np.where(Y == 1)]), axis=0) / m + lambdaValue * p / ( + 2 * m + ) + + # Cross entropy + errorDer = np.zeros(Y.shape) + errorDer[np.where(Y != 0)] = np.divide( + -Y[np.where(Y != 0)], out[np.where(Y != 0)] + ) + + # Calculate sigmas + fGradients = np.multiply(a3T, (1 - a3T)) + gGradients = np.multiply( + errorDer, + np.concatenate( + ( + fGradients[:, 0].reshape(-1, 1), + (fGradients[:, 1:] - fGradients[:, :-1]), + -fGradients[:, -1].reshape(-1, 1), + ), + axis=1, + ), + ) + sigma3 = -np.sum(gGradients, axis=1)[:, np.newaxis] + sigma2 = np.multiply(np.multiply(np.matmul(sigma3, Theta2), a2), (1 - a2)) + + # Accumulate gradients + delta_1 = np.matmul(sigma2.T, a1) + delta_2 = np.matmul(sigma3.T, a2) + + # Calculate regularized gradient + p1 = (lambdaValue / m) * np.concatenate( + (np.zeros((np.size(Theta1, axis=0), 1)), Theta1[:, 1:]), axis=1 + ) + p2 = (lambdaValue / m) * Theta2[:, 0:] + Theta1_grad = delta_1 / m + p1 + Theta2_grad = delta_2 / m + p2 + + # Treshold gradients + ThreshGradMatrix = np.multiply( + np.concatenate( + ( + np.triu(np.ones((num_labels - 1, num_labels - 1))), + np.ones((num_labels - 1, 1)), + ), + axis=1, + ), + np.tile(gGradients.sum(axis=0), (num_labels - 1, 1)), + ) + + originalShape = ThreshGradMatrix.shape + ThreshGradMatrix = ThreshGradMatrix.flatten(order="F") + + ThreshGradMatrix[(num_labels)::num_labels] = ThreshGradMatrix.flatten( + order="F" + )[(num_labels)::num_labels] + np.multiply( + errorDer[:, 1 : (num_labels - 1)], fGradients[:, 0 : (num_labels - 2)] + ).sum( + axis=0 + ) + + ThreshGradMatrix = np.reshape( + ThreshGradMatrix[:, np.newaxis], originalShape, order="F" + ) + + Threshold_grad = ThreshGradMatrix.sum(axis=1)[:, np.newaxis] / m + Threshold_grad[1:] = 2 * np.multiply(Threshold_grad[1:], thresholds_param[1:]) + + # Unroll gradients + grad = np.concatenate( + ( + Theta1_grad.flatten(order="F"), + Theta2_grad.flatten(order="F"), + Threshold_grad.flatten(order="F"), + ), + axis=0, + ) + + return J, grad diff --git a/orca_python/classifiers/OrdinalDecomposition.py b/orca_python/classifiers/OrdinalDecomposition.py index e43f1dd..da41526 100644 --- a/orca_python/classifiers/OrdinalDecomposition.py +++ b/orca_python/classifiers/OrdinalDecomposition.py @@ -1,522 +1,498 @@ - import numpy as np -from sklearn.metrics import make_scorer from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import check_X_y, check_array, check_is_fitted +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y + +from orca_python.utilities import load_classifier # from sys import path # path.append('..') -from orca_python.utilities import load_classifier - class OrdinalDecomposition(BaseEstimator, ClassifierMixin): - """ - OrdinalDecomposition ensemble classifier - - This class implements an ensemble model where an ordinal problem - is decomposed into several binary subproblems, each one of which - will generate a different (binary) model, though all will share - same base classifier and parameters for it. - - There are 4 different ways to decompose the original problem based - on how the coding matrix is built. - - - Parameters - ---------- - - dtype: string - Type of decomposition to be performed by classifier. May be - one of 4 different types: 'ordered_partitions', 'one_vs_next', - 'one_vs_followers' or 'one_vs_previous' - - The coding matrix generated by each method, for a problem with - 5 classes will be as follows: - - ordered_partitions one_vs_next one_vs_followers one_vs_previous - - -, -, -, -; -, , , ; -, , , ; +, +, +, +; - +, -, -, -; +, -, , ; +, -, , ; +, +, +, -; - +, +, -, -; , +, -, ; +, +, -, ; +, +, -, ; - +, +, +, -; , , +, -; +, +, +, -; +, -, , ; - +, +, +, +; , , , +; +, +, +, +; -, , , ; - - where rows represent classes and columns represent base - classifiers. plus signs indicate that for that classifier, - the label will be part of the positive class, on the other - hand, a minus sign places that class into the negative one - for that binary problem. If there is no sign, then those - samples will not be used when building the model. - - decision_method: string - Decision method that transforms the predictions of the n - different base classifiers to produce the final label (one - among the real ordinal classes). - - base_classifier: string - Base classifier used to build a model for each binary - subproblem. The base classifier need to be a classifier of - orca-python framework or any classifier available in sklearn. - Other classifiers implemented in sklearn's API can be used here - - parameters: dict - This dictionary will store the parameters used to build the - base classifier. Only one value per parameter is allowed. - - Attributes - ---------- - - classes_: list - List that contains all different class labels found in the - original dataset. - - coding_matrix_: array-like, shape (n_targets, n_targets-1) - Matrix that defines which classes will be used to build the - model of each subproblem, and in which binary class they - belong inside those new models. Further explained previously. - - classifiers_: list of classifiers - Initialy empty, will include all fitted models for each - subproblem once the fit function for this class is called - successfully. - - - References - ---------- - P.A. Gutierrez, M. Perez-Ortiz, J. Sanchez-Monedero, - F. Fernandez-Navarro and C. Hervas-Martinez (2016), - "Ordinal regression methods: survey and experimental study", - IEEE Transactions on Knowledge and Data Engineering. Vol. 28. Issue 1 - http://dx.doi.org/10.1109/TKDE.2015.2457911 - """ - - def __init__(self, dtype="ordered_partitions", decision_method="frank_hall", - base_classifier="sklearn.linear_model.LogisticRegression", parameters={}): - - self.dtype = dtype - self.decision_method = decision_method - self.base_classifier = base_classifier - self.parameters = parameters - - - def fit(self, X, y): - - """ - Fit the model with the training data - - Parameters - ---------- - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - Training patterns array, where n_samples is the number of - samples and n_features is the number of features. - - y: array-like, shape (n_samples) - Target vector relative to X. - - Returns - ------- - - self: object - """ - - X, y = check_X_y(X, y) - - self.X_ = X - self.y_ = y - - # Get list of different labels of the dataset - self.classes_ = np.unique(y) - - # Give each train input its corresponding output label - # for each binary classifier - self.coding_matrix_ = self._coding_matrix(self.dtype.lower(), len(self.classes_)) - class_labels = self.coding_matrix_[(np.digitize(y, self.classes_) - 1), :] - - - self.classifiers_ = [] - # Fitting n_targets - 1 classifiers - for n in range(len(class_labels[0,:])): - - estimator = load_classifier(self.base_classifier, self.parameters) - estimator.fit(X[np.where(class_labels[:,n] != 0)], - np.ravel(class_labels[np.where(class_labels[:,n] != 0), n].T)) - - self.classifiers_.append(estimator) - - - return self - - - - def predict(self, X): - - """ - Performs classification on samples in X. - - Parameters - ---------- - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - - Returns - ------- - - predicted_y: array, shape (n_samples,) - Class labels for samples in X. - """ - - - check_is_fitted(self, ['X_', 'y_']) - X = check_array(X) - - # Getting predicted labels for dataset from each classifier - predictions = self._get_predictions(X) - - - decision_method = self.decision_method.lower() - if decision_method == "exponential_loss": - - # Scaling predictions from [0,1] range to [-1,1] - predictions = (predictions*2 - 1) - - # Transforming from binary problems to the original problem - losses = self._exponential_loss(predictions) - predicted_y = self.classes_[np.argmin(losses, axis=1)] - - - elif decision_method == "hinge_loss": - - # Scaling predictions from [0,1] range to [-1,1] - predictions = (predictions*2 - 1) - - # Transforming from binary problems to the original problem - losses = self._hinge_loss(predictions) - predicted_y = self.classes_[np.argmin(losses, axis=1)] - - - elif decision_method == "logarithmic_loss": - - # Scaling predictions from [0,1] range to [-1,1] - predictions = (predictions*2 - 1) - - # Transforming from binary problems to the original problem - losses = self._logarithmic_loss(predictions) - predicted_y = self.classes_[np.argmin(losses, axis=1)] - - - elif decision_method == "frank_hall": - - # Transforming from binary problems to the original problem - predicted_proba_y = self._frank_hall_method(predictions) - predicted_y = self.classes_[np.argmax(predicted_proba_y, axis=1)] - - - else: - raise AttributeError('The specified loss method "%s" is not implemented' - % decision_method) - - - return predicted_y - - - def predict_proba(self, X): - - """ - The returned estimates for all classes are ordered by the label of classes. - - Parameters - ---------- - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - - Returns - ------- - - predicted_proba_y: array, shape (n_samples,) - Returns the probability of the sample for each class in the model, where classes are ordered as they are in self.classes_. - """ - - - check_is_fitted(self, ['X_', 'y_']) - X = check_array(X) - - # Getting predicted labels for dataset from each classifier - predictions = self._get_predictions(X) - - - decision_method = self.decision_method.lower() - if decision_method == "exponential_loss": - - # Scaling predictions from [0,1] range to [-1,1] - predictions = (predictions*2 - 1) - - # Transforming from binary problems to the original problem - losses = self._exponential_loss(predictions) - losses = 1 / losses.astype(float) - predicted_proba_y = [] - for losse in losses: - predicted_proba_y.append((np.exp(losse) / np.sum(np.exp(losse)))) - predicted_proba_y = np.array(predicted_proba_y) - - - elif decision_method == "hinge_loss": - - # Scaling predictions from [0,1] range to [-1,1] - predictions = (predictions*2 - 1) - - # Transforming from binary problems to the original problem - losses = self._hinge_loss(predictions) - losses = 1 / losses.astype(float) - predicted_proba_y = [] - for losse in losses: - predicted_proba_y.append((np.exp(losse) / np.sum(np.exp(losse)))) - predicted_proba_y = np.array(predicted_proba_y) - - - elif decision_method == "logarithmic_loss": - - # Scaling predictions from [0,1] range to [-1,1] - predictions = (predictions*2 - 1) - - # Transforming from binary problems to the original problem - losses = self._logarithmic_loss(predictions) - losses = 1 / losses.astype(float) - predicted_proba_y = [] - for losse in losses: - predicted_proba_y.append((np.exp(losse) / np.sum(np.exp(losse)))) - predicted_proba_y = np.array(predicted_proba_y) - - - elif decision_method == "frank_hall": - - # Transforming from binary problems to the original problem - predicted_proba_y = self._frank_hall_method(predictions) - - else: - raise AttributeError('The specified loss method "%s" is not implemented' - % decision_method) - - - return predicted_proba_y - - def _coding_matrix(self, dtype, n_classes): - - """ - Method that returns the coding matrix for a given dataset. - - Parameters - ---------- - - dtype: string - Type of decomposition to be performed by classifier. - - n_classes: int - Number of different classes in actual dataset - - Returns - ------- - - coding_matrix: array-like, shape (n_targets, n_targets-1) - Each value must be in range {-1, 1, 0}, whether that class - will belong to negative class, positive class or will not - be used for that particular binary classifier. - """ - - if dtype == "ordered_partitions": - - coding_matrix = np.triu((-2 * np.ones(n_classes - 1))) + 1 - coding_matrix = np.vstack([coding_matrix, np.ones((1, n_classes-1))]) - - elif dtype == "one_vs_next": - - plus_ones = np.diagflat(np.ones((1, n_classes - 1), dtype=int), -1) - minus_ones = -(np.eye(n_classes, n_classes - 1, dtype=int)) - coding_matrix = minus_ones + plus_ones[:,:-1] - - elif dtype == "one_vs_followers": - - minus_ones = np.diagflat(-np.ones((1, n_classes), dtype=int)) - plus_ones = np.tril(np.ones(n_classes), -1) - coding_matrix = (plus_ones + minus_ones)[:,:-1] - - elif dtype == "one_vs_previous": - - plusones = np.triu(np.ones(n_classes)) - minusones = -np.diagflat(np.ones((1, n_classes - 1)), -1) - coding_matrix = np.flip((plusones + minusones)[:,:-1], axis=1) - - else: - - raise ValueError("Decomposition type %s does not exist" % dtype) - - return coding_matrix.astype(int) - - - - def _get_predictions(self, X): - - """ - For each pattern inside the dataset X, this method returns - the probability for that pattern to belong to the positive - class. There will be as many predictions (columns) as different - binary classifiers have been fitted previously. - - Parameters - ---------- - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - - Returns - ------- - - predictions: array, shape (n_samples, n_targets-1) - """ - - predictions = np.array(list(map(lambda c: c.predict_proba(X)[:,1], self.classifiers_))).T - - return predictions - - - - def _exponential_loss(self, predictions): - - """ - Computation of the exponential losses for each label of the - original ordinal multinomial problem. Transforms from n-1 - binary subproblems to the original ordinal problem with - n targets. - - Parameters - ---------- - - predictions: array, shape (n_samples, n_targets-1) - - Returns - ------- - - e_losses: array, shape (n_samples, n_targets) - Exponential losses for each sample of dataset X. One - different value for each class label. - """ - - - # Computing exponential losses - e_losses = np.zeros((predictions.shape[0], (predictions.shape[1] + 1))) - for i in range(predictions.shape[1] + 1): - - e_losses[:,i] = np.sum(np.exp(-predictions * np.tile(self.coding_matrix_[i,:], - (predictions.shape[0], 1))), axis=1) - - return e_losses - - - - def _hinge_loss(self, predictions): - - """ - Computation of the Hinge losses for each label of the - original ordinal multinomial problem. Transforms from n-1 - binary subproblems to the original ordinal problem with - n targets. + """ + OrdinalDecomposition ensemble classifier + + This class implements an ensemble model where an ordinal problem + is decomposed into several binary subproblems, each one of which + will generate a different (binary) model, though all will share + same base classifier and parameters for it. + + There are 4 different ways to decompose the original problem based + on how the coding matrix is built. + + + Parameters + ---------- + + dtype: string + Type of decomposition to be performed by classifier. May be + one of 4 different types: 'ordered_partitions', 'one_vs_next', + 'one_vs_followers' or 'one_vs_previous' - Parameters - ---------- + The coding matrix generated by each method, for a problem with + 5 classes will be as follows: + + ordered_partitions one_vs_next one_vs_followers one_vs_previous + + -, -, -, -; -, , , ; -, , , ; +, +, +, +; + +, -, -, -; +, -, , ; +, -, , ; +, +, +, -; + +, +, -, -; , +, -, ; +, +, -, ; +, +, -, ; + +, +, +, -; , , +, -; +, +, +, -; +, -, , ; + +, +, +, +; , , , +; +, +, +, +; -, , , ; + + where rows represent classes and columns represent base + classifiers. plus signs indicate that for that classifier, + the label will be part of the positive class, on the other + hand, a minus sign places that class into the negative one + for that binary problem. If there is no sign, then those + samples will not be used when building the model. + + decision_method: string + Decision method that transforms the predictions of the n + different base classifiers to produce the final label (one + among the real ordinal classes). + + base_classifier: string + Base classifier used to build a model for each binary + subproblem. The base classifier need to be a classifier of + orca-python framework or any classifier available in sklearn. + Other classifiers implemented in sklearn's API can be used here - predictions: array, shape (n_samples, n_targets-1) + parameters: dict + This dictionary will store the parameters used to build the + base classifier. Only one value per parameter is allowed. - Returns - ------- + Attributes + ---------- - hLosses: array, shape (n_samples, n_targets) - Hinge losses for each sample of dataset X. One - different value for each class label. + classes_: list + List that contains all different class labels found in the + original dataset. - """ + coding_matrix_: array-like, shape (n_targets, n_targets-1) + Matrix that defines which classes will be used to build the + model of each subproblem, and in which binary class they + belong inside those new models. Further explained previously. - # Computing Hinge losses - h_losses = np.zeros((predictions.shape[0], (predictions.shape[1] + 1))) - for i in range(predictions.shape[1] + 1): + classifiers_: list of classifiers + Initialy empty, will include all fitted models for each + subproblem once the fit function for this class is called + successfully. - h_losses[:,i] = np.sum(np.maximum(0, (1 - np.tile(self.coding_matrix_[i,:], - (predictions.shape[0], 1)) - * predictions)), axis=1) - return h_losses + References + ---------- + P.A. Gutierrez, M. Perez-Ortiz, J. Sanchez-Monedero, + F. Fernandez-Navarro and C. Hervas-Martinez (2016), + "Ordinal regression methods: survey and experimental study", + IEEE Transactions on Knowledge and Data Engineering. Vol. 28. Issue 1 + http://dx.doi.org/10.1109/TKDE.2015.2457911 + """ + def __init__( + self, + dtype="ordered_partitions", + decision_method="frank_hall", + base_classifier="sklearn.linear_model.LogisticRegression", + parameters={}, + ): + self.dtype = dtype + self.decision_method = decision_method + self.base_classifier = base_classifier + self.parameters = parameters + def fit(self, X, y): + """ + Fit the model with the training data + + Parameters + ---------- - def _logarithmic_loss(self, predictions): + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of + samples and n_features is the number of features. - """ - Computation of the logarithmic losses for each label of the - original ordinal multinomial problem. Transforms from n-1 - binary subproblems to the original ordinal problem with - n targets. + y: array-like, shape (n_samples) + Target vector relative to X. - Parameters - ---------- + Returns + ------- - predictions: array, shape (n_samples, n_targets-1) + self: object + """ - Returns - ------- + X, y = check_X_y(X, y) + + self.X_ = X + self.y_ = y + + # Get list of different labels of the dataset + self.classes_ = np.unique(y) - eLosses: array, shape (n_samples, n_targets) - logarithmic losses for each sample of dataset X. One - different value for each class label. + # Give each train input its corresponding output label + # for each binary classifier + self.coding_matrix_ = self._coding_matrix( + self.dtype.lower(), len(self.classes_) + ) + class_labels = self.coding_matrix_[(np.digitize(y, self.classes_) - 1), :] + + self.classifiers_ = [] + # Fitting n_targets - 1 classifiers + for n in range(len(class_labels[0, :])): + estimator = load_classifier(self.base_classifier, self.parameters) + estimator.fit( + X[np.where(class_labels[:, n] != 0)], + np.ravel(class_labels[np.where(class_labels[:, n] != 0), n].T), + ) + + self.classifiers_.append(estimator) - """ + return self + + def predict(self, X): + """ + Performs classification on samples in X. + + Parameters + ---------- + + X: {array-like, sparse matrix}, shape (n_samples, n_features) + + Returns + ------- + + predicted_y: array, shape (n_samples,) + Class labels for samples in X. + """ + + check_is_fitted(self, ["X_", "y_"]) + X = check_array(X) + + # Getting predicted labels for dataset from each classifier + predictions = self._get_predictions(X) + + decision_method = self.decision_method.lower() + if decision_method == "exponential_loss": + # Scaling predictions from [0,1] range to [-1,1] + predictions = predictions * 2 - 1 + + # Transforming from binary problems to the original problem + losses = self._exponential_loss(predictions) + predicted_y = self.classes_[np.argmin(losses, axis=1)] + + elif decision_method == "hinge_loss": + # Scaling predictions from [0,1] range to [-1,1] + predictions = predictions * 2 - 1 + + # Transforming from binary problems to the original problem + losses = self._hinge_loss(predictions) + predicted_y = self.classes_[np.argmin(losses, axis=1)] + elif decision_method == "logarithmic_loss": + # Scaling predictions from [0,1] range to [-1,1] + predictions = predictions * 2 - 1 - # Computing logarithmic losses - l_losses = np.zeros((predictions.shape[0], (predictions.shape[1] + 1))) - for i in range(predictions.shape[1] + 1): + # Transforming from binary problems to the original problem + losses = self._logarithmic_loss(predictions) + predicted_y = self.classes_[np.argmin(losses, axis=1)] - l_losses[:,i] = np.sum(np.log(1 + np.exp(-2 * np.tile(self.coding_matrix_[i,:], - (predictions.shape[0], 1)) - * predictions)), axis=1) + elif decision_method == "frank_hall": + # Transforming from binary problems to the original problem + predicted_proba_y = self._frank_hall_method(predictions) + predicted_y = self.classes_[np.argmax(predicted_proba_y, axis=1)] - return l_losses + else: + raise AttributeError( + 'The specified loss method "%s" is not implemented' % decision_method + ) + return predicted_y + def predict_proba(self, X): + """ + The returned estimates for all classes are ordered by the label of classes. - def _frank_hall_method(self, predictions): + Parameters + ---------- - """ - Returns the probability for each pattern of dataset to - belong to each one of the original targets. Transforms from n-1 - subproblems to the original ordinal problem with n targets. + X: {array-like, sparse matrix}, shape (n_samples, n_features) - Parameters - ---------- + Returns + ------- - predictions: array, shape (n_samples, n_targets-1) + predicted_proba_y: array, shape (n_samples,) + Returns the probability of the sample for each class in the model, where classes are ordered as they are in self.classes_. + """ - Returns - ------- + check_is_fitted(self, ["X_", "y_"]) + X = check_array(X) + + # Getting predicted labels for dataset from each classifier + predictions = self._get_predictions(X) + + decision_method = self.decision_method.lower() + if decision_method == "exponential_loss": + # Scaling predictions from [0,1] range to [-1,1] + predictions = predictions * 2 - 1 - predicted_proba_y: array, shape (n_samples, n_targets) - Class labels predicted for samples in dataset X. - """ + # Transforming from binary problems to the original problem + losses = self._exponential_loss(predictions) + losses = 1 / losses.astype(float) + predicted_proba_y = [] + for losse in losses: + predicted_proba_y.append((np.exp(losse) / np.sum(np.exp(losse)))) + predicted_proba_y = np.array(predicted_proba_y) + elif decision_method == "hinge_loss": + # Scaling predictions from [0,1] range to [-1,1] + predictions = predictions * 2 - 1 - if self.dtype.lower() != "ordered_partitions": - raise AttributeError("When using Frank and Hall decision method,\ - ordered_partitions must be used") + # Transforming from binary problems to the original problem + losses = self._hinge_loss(predictions) + losses = 1 / losses.astype(float) + predicted_proba_y = [] + for losse in losses: + predicted_proba_y.append((np.exp(losse) / np.sum(np.exp(losse)))) + predicted_proba_y = np.array(predicted_proba_y) + elif decision_method == "logarithmic_loss": + # Scaling predictions from [0,1] range to [-1,1] + predictions = predictions * 2 - 1 - predicted_proba_y = np.empty([(predictions.shape[0]), (predictions.shape[1] + 1)]) + # Transforming from binary problems to the original problem + losses = self._logarithmic_loss(predictions) + losses = 1 / losses.astype(float) + predicted_proba_y = [] + for losse in losses: + predicted_proba_y.append((np.exp(losse) / np.sum(np.exp(losse)))) + predicted_proba_y = np.array(predicted_proba_y) - # Probabilities of each set to belong to the first ordinal class - predicted_proba_y[:,0] = 1 - predictions[:,0] + elif decision_method == "frank_hall": + # Transforming from binary problems to the original problem + predicted_proba_y = self._frank_hall_method(predictions) - # Probabilities for the central classes - predicted_proba_y[:,1:-1] = predictions[:,:-1] - predictions[:,1:] + else: + raise AttributeError( + 'The specified loss method "%s" is not implemented' % decision_method + ) + + return predicted_proba_y + + def _coding_matrix(self, dtype, n_classes): + """ + Method that returns the coding matrix for a given dataset. + + Parameters + ---------- + + dtype: string + Type of decomposition to be performed by classifier. + + n_classes: int + Number of different classes in actual dataset + + Returns + ------- + + coding_matrix: array-like, shape (n_targets, n_targets-1) + Each value must be in range {-1, 1, 0}, whether that class + will belong to negative class, positive class or will not + be used for that particular binary classifier. + """ - # Probabilities of each set to belong to the last class - predicted_proba_y[:,-1] = predictions[:,-1] + if dtype == "ordered_partitions": + coding_matrix = np.triu((-2 * np.ones(n_classes - 1))) + 1 + coding_matrix = np.vstack([coding_matrix, np.ones((1, n_classes - 1))]) - return predicted_proba_y + elif dtype == "one_vs_next": + plus_ones = np.diagflat(np.ones((1, n_classes - 1), dtype=int), -1) + minus_ones = -(np.eye(n_classes, n_classes - 1, dtype=int)) + coding_matrix = minus_ones + plus_ones[:, :-1] + + elif dtype == "one_vs_followers": + minus_ones = np.diagflat(-np.ones((1, n_classes), dtype=int)) + plus_ones = np.tril(np.ones(n_classes), -1) + coding_matrix = (plus_ones + minus_ones)[:, :-1] + + elif dtype == "one_vs_previous": + plusones = np.triu(np.ones(n_classes)) + minusones = -np.diagflat(np.ones((1, n_classes - 1)), -1) + coding_matrix = np.flip((plusones + minusones)[:, :-1], axis=1) + + else: + raise ValueError("Decomposition type %s does not exist" % dtype) + + return coding_matrix.astype(int) + + def _get_predictions(self, X): + """ + For each pattern inside the dataset X, this method returns + the probability for that pattern to belong to the positive + class. There will be as many predictions (columns) as different + binary classifiers have been fitted previously. + + Parameters + ---------- + + X: {array-like, sparse matrix}, shape (n_samples, n_features) + + Returns + ------- + + predictions: array, shape (n_samples, n_targets-1) + """ + + predictions = np.array( + list(map(lambda c: c.predict_proba(X)[:, 1], self.classifiers_)) + ).T + + return predictions + + def _exponential_loss(self, predictions): + """ + Computation of the exponential losses for each label of the + original ordinal multinomial problem. Transforms from n-1 + binary subproblems to the original ordinal problem with + n targets. + + Parameters + ---------- + + predictions: array, shape (n_samples, n_targets-1) + + Returns + ------- + + e_losses: array, shape (n_samples, n_targets) + Exponential losses for each sample of dataset X. One + different value for each class label. + """ + + # Computing exponential losses + e_losses = np.zeros((predictions.shape[0], (predictions.shape[1] + 1))) + for i in range(predictions.shape[1] + 1): + e_losses[:, i] = np.sum( + np.exp( + -predictions + * np.tile(self.coding_matrix_[i, :], (predictions.shape[0], 1)) + ), + axis=1, + ) + + return e_losses + + def _hinge_loss(self, predictions): + """ + Computation of the Hinge losses for each label of the + original ordinal multinomial problem. Transforms from n-1 + binary subproblems to the original ordinal problem with + n targets. + + Parameters + ---------- + + predictions: array, shape (n_samples, n_targets-1) + + Returns + ------- + + hLosses: array, shape (n_samples, n_targets) + Hinge losses for each sample of dataset X. One + different value for each class label. + + """ + + # Computing Hinge losses + h_losses = np.zeros((predictions.shape[0], (predictions.shape[1] + 1))) + for i in range(predictions.shape[1] + 1): + h_losses[:, i] = np.sum( + np.maximum( + 0, + ( + 1 + - np.tile(self.coding_matrix_[i, :], (predictions.shape[0], 1)) + * predictions + ), + ), + axis=1, + ) + + return h_losses + + def _logarithmic_loss(self, predictions): + """ + Computation of the logarithmic losses for each label of the + original ordinal multinomial problem. Transforms from n-1 + binary subproblems to the original ordinal problem with + n targets. + + Parameters + ---------- + + predictions: array, shape (n_samples, n_targets-1) + + Returns + ------- + + eLosses: array, shape (n_samples, n_targets) + logarithmic losses for each sample of dataset X. One + different value for each class label. + + """ + + # Computing logarithmic losses + l_losses = np.zeros((predictions.shape[0], (predictions.shape[1] + 1))) + for i in range(predictions.shape[1] + 1): + l_losses[:, i] = np.sum( + np.log( + 1 + + np.exp( + -2 + * np.tile(self.coding_matrix_[i, :], (predictions.shape[0], 1)) + * predictions + ) + ), + axis=1, + ) + + return l_losses + + def _frank_hall_method(self, predictions): + """ + Returns the probability for each pattern of dataset to + belong to each one of the original targets. Transforms from n-1 + subproblems to the original ordinal problem with n targets. + + Parameters + ---------- + + predictions: array, shape (n_samples, n_targets-1) + + Returns + ------- + + predicted_proba_y: array, shape (n_samples, n_targets) + Class labels predicted for samples in dataset X. + """ + + if self.dtype.lower() != "ordered_partitions": + raise AttributeError( + "When using Frank and Hall decision method,\ + ordered_partitions must be used" + ) + + predicted_proba_y = np.empty( + [(predictions.shape[0]), (predictions.shape[1] + 1)] + ) + + # Probabilities of each set to belong to the first ordinal class + predicted_proba_y[:, 0] = 1 - predictions[:, 0] + + # Probabilities for the central classes + predicted_proba_y[:, 1:-1] = predictions[:, :-1] - predictions[:, 1:] + + # Probabilities of each set to belong to the last class + predicted_proba_y[:, -1] = predictions[:, -1] + + return predicted_proba_y diff --git a/orca_python/classifiers/REDSVM.py b/orca_python/classifiers/REDSVM.py index 2130ac5..698bc7f 100644 --- a/orca_python/classifiers/REDSVM.py +++ b/orca_python/classifiers/REDSVM.py @@ -1,140 +1,137 @@ # encoding: utf-8 import numpy as np from sklearn.base import BaseEstimator, ClassifierMixin -from sklearn.utils.validation import check_X_y, check_array, check_is_fitted from sklearn.utils.multiclass import unique_labels - -# from .libsvmRank.python import svm +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y from orca_python.classifiers.libsvmRank.python import svm +# from .libsvmRank.python import svm + class REDSVM(BaseEstimator, ClassifierMixin): - """ - REDSVM Reduction from ordinal regression to binary SVM classifiers [1]. + """ + REDSVM Reduction from ordinal regression to binary SVM classifiers [1]. The configuration used is the identity coding matrix, the absolute cost matrix and the standard binary soft-margin SVM. This class uses libsvm-rank-2.81 implementation (http://www.work.caltech.edu/~htlin/program/libsvm/) - - REDSVM methods: - fit - Fits a model from training data - predict - Performs label prediction - - References: - [1] H.-T. Lin and L. Li, "Reduction from cost-sensitive ordinal - ranking to weighted binary classification" Neural Computation, - vol. 24, no. 5, pp. 1329-1367, 2012. - http://10.1162/NECO_a_00265 - [2] P.A. Gutiérrez, M. Pérez-Ortiz, J. Sánchez-Monedero, - F. Fernández-Navarro and C. Hervás-Martínez - Ordinal regression methods: survey and experimental study - IEEE Transactions on Knowledge and Data Engineering, Vol. 28. Issue 1 - 2016 - http://dx.doi.org/10.1109/TKDE.2015.2457911 - - Model Parameters: - t kernel_type : set type of kernel function (default 2) - 0 -- linear: u'*v - 1 -- polynomial: (gamma*u'*v + coef0)^degree - 2 -- radial basis function: exp(-gamma*|u-v|^2) - 3 -- sigmoid: tanh(gamma*u'*v + coef0) - 4 -- stump: -|u-v|_1 + coef0\n" - 5 -- perceptron: -|u-v|_2 + coef0\n" - 6 -- laplacian: exp(-gamma*|u-v|_1)\n" - 7 -- exponential: exp(-gamma*|u-v|_2)\n" - 8 -- precomputed kernel (kernel values in training_instance_matrix) - d degree : set degree in kernel function (default 3) - g gamma : set gamma in kernel function (default 1/num_features) - r coef0 : set coef0 in kernel function (default 0) - c cost : set the parameter C (default 1) - m cachesize : set cache memory size in MB (default 100) - e epsilon : set tolerance of termination criterion (default 0.001) - h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1) - q : quiet mode (no outputs) - """ - - #Set parameters values - def __init__(self, t=2, d=3, g=None, r=0, c=1, m=100, e=0.001, h=1): - - self.t = t - self.d = d - self.g = g - self.r = r - self.c = c - self.m = m - self.e = e - self.h = h - - - def fit(self, X, y): - - """ - Fit the model with the training data - - Parameters - ---------- - - X: {array-like, sparse matrix}, shape (n_samples, n_features) - Training patterns array, where n_samples is the number of samples - and n_features is the number of features - - y: array-like, shape (n_samples) - Target vector relative to X - - Returns - ------- - - self: object - """ - - # Check that X and y have correct shape - X, y = check_X_y(X, y) - # Store the classes seen during fit - self.classes_ = unique_labels(y) - - #Set the default g value if necessary - if self.g == None: - self.g = 1 / np.size(X, 1) - - # Fit the model - options = "-s 5 -t {} -d {} -g {} -r {} -c {} -m {} -e {} -h {} -q".format(str(self.t), - str(self.d), - str(self.g), - str(self.r), - str(self.c), - str(self.m), - str(self.e), - str(self.h)) - self.classifier_ = svm.fit(y.tolist(), X.tolist(), options) - - return self - - - def predict(self, X): - - """ - Performs classification on samples in X - - Parameters - ---------- - - X : {array-like, sparse matrix}, shape (n_samples, n_features) - - Returns - ------- - - predicted_y : array, shape (n_samples,) - Class labels for samples in X. - """ - - # Check is fit had been called - check_is_fitted(self, ['classifier_']) - - # Input validation - X = check_array(X) - - predicted_y = svm.predict(X.tolist(), self.classifier_) - - return predicted_y + + REDSVM methods: + fit - Fits a model from training data + predict - Performs label prediction + + References: + [1] H.-T. Lin and L. Li, "Reduction from cost-sensitive ordinal + ranking to weighted binary classification" Neural Computation, + vol. 24, no. 5, pp. 1329-1367, 2012. + http://10.1162/NECO_a_00265 + [2] P.A. Gutiérrez, M. Pérez-Ortiz, J. Sánchez-Monedero, + F. Fernández-Navarro and C. Hervás-Martínez + Ordinal regression methods: survey and experimental study + IEEE Transactions on Knowledge and Data Engineering, Vol. 28. + Issue 1 2016 + http://dx.doi.org/10.1109/TKDE.2015.2457911 + + Model Parameters: + t kernel_type : set type of kernel function (default 2) + 0 -- linear: u'*v + 1 -- polynomial: (gamma*u'*v + coef0)^degree + 2 -- radial basis function: exp(-gamma*|u-v|^2) + 3 -- sigmoid: tanh(gamma*u'*v + coef0) + 4 -- stump: -|u-v|_1 + coef0\n" + 5 -- perceptron: -|u-v|_2 + coef0\n" + 6 -- laplacian: exp(-gamma*|u-v|_1)\n" + 7 -- exponential: exp(-gamma*|u-v|_2)\n" + 8 -- precomputed kernel (kernel values in training_instance_matrix) + d degree : set degree in kernel function (default 3) + g gamma : set gamma in kernel function (default 1/num_features) + r coef0 : set coef0 in kernel function (default 0) + c cost : set the parameter C (default 1) + m cachesize : set cache memory size in MB (default 100) + e epsilon : set tolerance of termination criterion (default 0.001) + h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1) + q : quiet mode (no outputs) + """ + + # Set parameters values + def __init__(self, t=2, d=3, g=None, r=0, c=1, m=100, e=0.001, h=1): + self.t = t + self.d = d + self.g = g + self.r = r + self.c = c + self.m = m + self.e = e + self.h = h + + def fit(self, X, y): + """ + Fit the model with the training data + + Parameters + ---------- + + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of samples + and n_features is the number of features + + y: array-like, shape (n_samples) + Target vector relative to X + + Returns + ------- + + self: object + """ + + # Check that X and y have correct shape + X, y = check_X_y(X, y) + # Store the classes seen during fit + self.classes_ = unique_labels(y) + + # Set the default g value if necessary + if self.g is None: + self.g = 1 / np.size(X, 1) + + # Fit the model + options = "-s 5 -t {} -d {} -g {} -r {} -c {} -m {} -e {} -h {} -q".format( + str(self.t), + str(self.d), + str(self.g), + str(self.r), + str(self.c), + str(self.m), + str(self.e), + str(self.h), + ) + self.classifier_ = svm.fit(y.tolist(), X.tolist(), options) + + return self + + def predict(self, X): + """ + Performs classification on samples in X + + Parameters + ---------- + + X : {array-like, sparse matrix}, shape (n_samples, n_features) + + Returns + ------- + + predicted_y : array, shape (n_samples,) + Class labels for samples in X. + """ + + # Check is fit had been called + check_is_fitted(self, ["classifier_"]) + + # Input validation + X = check_array(X) + + predicted_y = svm.predict(X.tolist(), self.classifier_) + + return predicted_y diff --git a/orca_python/classifiers/RegressorWrapper.py b/orca_python/classifiers/RegressorWrapper.py new file mode 100644 index 0000000..b0002b8 --- /dev/null +++ b/orca_python/classifiers/RegressorWrapper.py @@ -0,0 +1,123 @@ +import numpy as np +from sklearn.base import BaseEstimator, ClassifierMixin +from sklearn.utils.validation import check_array, check_is_fitted, check_X_y + + +class RegressorWrapper(BaseEstimator, ClassifierMixin): + """ + Regression algorithms wrapper + + The mainly purpose of this class is create a generic wrapper which could + obtains ordinal models by regression algorithms, the targets for the independent + variable could be provided by the users and it works all the regression algorithms + avaliable in sklearn. + + Parameters + ------------ + + classifier: sklearn regressor + Base regressor used to build de model. this need to be a sklearn regressor. + + labels: String[] + Array which include the labels choosed by the user to transform the continous + data into nominal data, if users does not specify the labels by himself the + method will use a predefined values + + params: String + path of the Json file from where the method load the configuration for sklearn + regressor in case of the user do not incluide it the regressor will use the + default value by sklearn. + + + """ + + def __init__(self, base_regressor=None, **params): + self.params = params + self.classifier_ = None + + self.base_regressor = base_regressor + if self.base_regressor is None: + self.base_regressor = "sklearn.svm.SVR" + + classifier = __import__(self.base_regressor.rsplit(".", 1)[0], fromlist="None") + classifier = getattr(classifier, self.base_regressor.rsplit(".", 1)[1]) + self.classifier_ = classifier(**self.params) + + self.classes_ = None + + def fit(self, X, y, **params): + """ + Fit the model with the training data and set the kwargs for the regressor. + + Parameters + ---------- + + X: {array-like, sparse matrix}, shape (n_samples, n_features) + Training patterns array, where n_samples is the number of samples + and n_features is the number of features + + y: array-like, shape (n_samples) + Target vector relative to X + + Returns + ------- + + self: object + """ + X, y = check_X_y(X, y) + self.classes_ = np.unique(y) + + self.classifier_.fit(X, y, **params) + return self + + def predict(self, X): + """ + Performs classification on samples in X + + Parameters + ---------- + + X : {array-like, sparse matrix}, shape (n_samples, n_features) + + Returns + ------- + + predicted_y : array, shape (n_samples,) + Class labels for samples in X. + """ + check_is_fitted(self, ["classifier_"]) + X = check_array(X) + + predicted_y = self.classifier_.predict(X) + predicted_y = np.clip( + np.round(predicted_y, 0), self.classes_[0], self.classes_[-1] + ) + return np.asarray(predicted_y, dtype=int) + + def set_params(self, **kwargs): + if not kwargs["base_regressor"]: + self.base_regressor = "sklearn.svm.SVR" + else: + self.base_regressor = kwargs["base_regressor"] + kwargs.pop("base_regressor") + + classifier = __import__(self.base_regressor.rsplit(".", 1)[0], fromlist="None") + classifier = getattr(classifier, self.base_regressor.rsplit(".", 1)[1]) + self.classifier_ = classifier(**kwargs) + + return self + + def get_params(self, deep=True): + # This function is overrided to get the params of the internal regressor. + out = dict() + keys = self._get_param_names() + list(self.classifier_.get_params().keys()) + for num, key in enumerate(keys): + if num < len(self._get_param_names()): + value = getattr(self, key) + else: + value = getattr(self.classifier_, key) + if deep and hasattr(value, "get_params") and not isinstance(value, type): + deep_items = value.get_params().items() + out.update((key + "__" + k, val) for k, val in deep_items) + out[key] = value + return out diff --git a/orca_python/classifiers/__init__.py b/orca_python/classifiers/__init__.py index 25ac12c..b7f9760 100644 --- a/orca_python/classifiers/__init__.py +++ b/orca_python/classifiers/__init__.py @@ -1,10 +1,11 @@ - __all__ = [ "NNOP", "NNPOM", "OrdinalDecomposition", "REDSVM", "SVOREX", + "CSSVC", + "RegressorWrapper", ] from orca_python.classifiers.NNOP import NNOP @@ -12,4 +13,5 @@ from orca_python.classifiers.OrdinalDecomposition import OrdinalDecomposition from orca_python.classifiers.REDSVM import REDSVM from orca_python.classifiers.SVOREX import SVOREX - +from orca_python.classifiers.CSSVC import CSSVC +from orca_python.classifiers.RegressorWrapper import RegressorWrapper diff --git a/orca_python/metrics.py b/orca_python/metrics.py index acc7f56..5b7394d 100644 --- a/orca_python/metrics.py +++ b/orca_python/metrics.py @@ -5,186 +5,199 @@ from sklearn.metrics import confusion_matrix import scipy.stats + def greater_is_better(metric_name): - """ - Determines if greater values for one metric represent a better - classification rate or vice versa. Needed when declaring a - new scorer through make_scorer from sklearn. - """ + """ + Determines if greater values for one metric represent a better + classification rate or vice versa. Needed when declaring a + new scorer through make_scorer from sklearn. + """ - greater_is_better_metrics = ["ccr", "ms", "gm", "tkendall", "wkappa", "spearman"] - if metric_name in greater_is_better_metrics: - return True - else: - return False + greater_is_better_metrics = ["ccr", "ms", "gm", "tkendall", "wkappa", "spearman"] + if metric_name in greater_is_better_metrics: + return True + else: + return False def ccr(y, ypred): - """ - CCR - Correctly Classified Ratio + """ + CCR - Correctly Classified Ratio + + Also named Accuracy, it's the percentage of well + classified patterns among all patterns from a set. + """ - Also named Accuracy, it's the percentage of well - classified patterns among all patterns from a set. - """ + return np.count_nonzero(y == ypred) / float(len(y)) - return np.count_nonzero(y == ypred) / float( len(y) ) def amae(y, ypred): - """ - AMAE - Average MAE - - Mean of the MAE metric among classes. - """ - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - cm = confusion_matrix(y, ypred) - n_class = cm.shape[0] - costs = np.reshape(np.tile(range(n_class),n_class),(n_class,n_class)) - costs = np.abs(costs - np.transpose(costs)) - errores = costs*cm - amaes = np.sum(errores,axis=1)/np.sum(cm,axis=1).astype('double') - amaes = amaes[~np.isnan(amaes)] - return np.mean(amaes) + """ + AMAE - Average MAE + + Mean of the MAE metric among classes. + """ + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cm = confusion_matrix(y, ypred) + n_class = cm.shape[0] + costs = np.reshape(np.tile(range(n_class), n_class), (n_class, n_class)) + costs = np.abs(costs - np.transpose(costs)) + errores = costs * cm + amaes = np.sum(errores, axis=1) / np.sum(cm, axis=1).astype("double") + amaes = amaes[~np.isnan(amaes)] + return np.mean(amaes) + def gm(y, ypred): - """ + """ - GM - Geometric Mean + GM - Geometric Mean - Geometric mean of the sensitivy (accuracy) for each class - - """ + Geometric mean of the sensitivy (accuracy) for each class + + """ + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cm = confusion_matrix(y, ypred) + sum_byclass = np.sum(cm, axis=1) + sensitivities = np.diag(cm) / sum_byclass.astype("double") + sensitivities[sum_byclass == 0] = 1 + gm_result = pow(np.prod(sensitivities), 1.0 / cm.shape[0]) + return gm_result - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - cm = confusion_matrix(y, ypred) - sum_byclass = np.sum(cm,axis=1) - sensitivities = np.diag(cm)/sum_byclass.astype('double') - sensitivities[sum_byclass==0] = 1 - gm_result = pow(np.prod(sensitivities),1.0/cm.shape[0]) - return gm_result def mae(y, ypred): - """ - MAE - Mean Absolute Error + """ + MAE - Mean Absolute Error - Average absolute deviation of the predicted class - from the actual true class. - """ + Average absolute deviation of the predicted class + from the actual true class. + """ + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + y = np.asarray(y) + ypred = np.asarray(ypred) + return abs(y - ypred).sum() / len(y) - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - y = np.asarray(y) - ypred = np.asarray(ypred) - return abs(y - ypred).sum() / len(y) def mmae(y, ypred): - """ - MMAE - Maximum MAE - - MAE value of the class with higher distance from the - true values to the predicted ones. - """ - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - cm = confusion_matrix(y, ypred) - n_class = cm.shape[0] - costes=np.reshape(np.tile(range(n_class),n_class),(n_class,n_class)) - costes = np.abs(costes - np.transpose(costes)) - errores = costes*cm - amaes = np.sum(errores,axis=1)/np.sum(cm,axis=1).astype('double') - amaes = amaes[~np.isnan(amaes)] - return amaes.max() + """ + MMAE - Maximum MAE + + MAE value of the class with higher distance from the + true values to the predicted ones. + """ + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cm = confusion_matrix(y, ypred) + n_class = cm.shape[0] + costes = np.reshape(np.tile(range(n_class), n_class), (n_class, n_class)) + costes = np.abs(costes - np.transpose(costes)) + errores = costes * cm + amaes = np.sum(errores, axis=1) / np.sum(cm, axis=1).astype("double") + amaes = amaes[~np.isnan(amaes)] + return amaes.max() + def ms(y, ypred): - """ - MS - Minimum Sensitivity + """ + MS - Minimum Sensitivity + + Lowest percentage of patterns correctly predicted as + belonging to each class, with respect to the total number + of examples in the corresponding class. + """ - Lowest percentage of patterns correctly predicted as - belonging to each class, with respect to the total number - of examples in the corresponding class. - """ + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + cm = confusion_matrix(y, ypred) + sum_byclass = np.sum(cm, axis=1) + sensitivities = np.diag(cm) / sum_byclass.astype("double") + sensitivities[sum_byclass == 0] = 1 + ms = np.min(sensitivities) - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - cm = confusion_matrix(y, ypred) - sum_byclass = np.sum(cm,axis=1) - sensitivities = np.diag(cm)/sum_byclass.astype('double') - sensitivities[sum_byclass==0] = 1 - ms = np.min(sensitivities) + return ms - return ms def mze(y, ypred): - """ - MZE - Mean Zero-one Error + """ + MZE - Mean Zero-one Error - Better known as error rate, is the complementary measure of CCR. - """ + Better known as error rate, is the complementary measure of CCR. + """ - with warnings.catch_warnings(): - warnings.simplefilter("ignore") + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + confusion = confusion_matrix(y, ypred) + return 1 - np.diagonal(confusion).sum() / confusion.sum() - confusion = confusion_matrix(y, ypred) - return 1 - np.diagonal(confusion).sum() / confusion.sum() def tkendall(y, ypred): - """ - The Kendalls t is a statistic used to measure - the association between two measured quantities. - It is a measure of rank correlation. - """ + """ + The Kendalls t is a statistic used to measure + the association between two measured quantities. + It is a measure of rank correlation. + """ - with warnings.catch_warnings(): - warnings.simplefilter("ignore") + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + corr, pvalue = scipy.stats.kendalltau(y, ypred) + return corr - corr, pvalue = scipy.stats.kendalltau(y, ypred) - return corr def wkappa(y, ypred): - """ - The Weighted Kappa is a modified version of the Kappa - statistic calculated to allow as signing different weights - to different levels of aggregation between two variables. - """ - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - - cm = confusion_matrix(y, ypred) - n_class = cm.shape[0] - costes=np.reshape(np.tile(range(n_class),n_class),(n_class,n_class)) - costes = np.abs(costes - np.transpose(costes)) - f = 1 - costes - - n = cm.sum() - x = cm/n - - r = x.sum(axis=1) # Row sum - s = x.sum(axis=0) # Col sum - Ex = r.reshape(-1, 1) * s - po = (x * f).sum() - pe = (Ex * f).sum() - return (po - pe) / (1 - pe) + """ + The Weighted Kappa is a modified version of the Kappa + statistic calculated to allow as signing different weights + to different levels of aggregation between two variables. + """ + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + cm = confusion_matrix(y, ypred) + n_class = cm.shape[0] + costes = np.reshape(np.tile(range(n_class), n_class), (n_class, n_class)) + costes = np.abs(costes - np.transpose(costes)) + f = 1 - costes + + n = cm.sum() + x = cm / n + + r = x.sum(axis=1) # Row sum + s = x.sum(axis=0) # Col sum + Ex = r.reshape(-1, 1) * s + po = (x * f).sum() + pe = (Ex * f).sum() + return (po - pe) / (1 - pe) + def spearman(y, ypred): - """ - The Spearmans rank correlation coefficient is - a non-parametric measure of statistical dependence - between two variables. - """ - - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - - n = len(y) - num = ((y - np.repeat(np.mean(y), n)) * (ypred - np.repeat(np.mean(ypred), n))).sum() - div = np.sqrt((pow(y - np.repeat(np.mean(y), n), 2)).sum() - * (pow(ypred - np.repeat(np.mean(ypred), n), 2)).sum()) - - if num == 0: - return 0 - else: - return num / div + """ + The Spearmans rank correlation coefficient is + a non-parametric measure of statistical dependence + between two variables. + """ + + with warnings.catch_warnings(): + warnings.simplefilter("ignore") + + n = len(y) + num = ( + (y - np.repeat(np.mean(y), n)) * (ypred - np.repeat(np.mean(ypred), n)) + ).sum() + div = np.sqrt( + (pow(y - np.repeat(np.mean(y), n), 2)).sum() + * (pow(ypred - np.repeat(np.mean(ypred), n), 2)).sum() + ) + + if num == 0: + return 0 + else: + return num / div diff --git a/orca_python/results.py b/orca_python/results.py index c163105..e6c7197 100644 --- a/orca_python/results.py +++ b/orca_python/results.py @@ -9,247 +9,270 @@ class Results: - """ - Results - - Class that handles all information from an experiment that needs - to be saved. This info will be saved into an specified folder. - - Attributes - ---------- - - _experiment_folder: string - Path where all the information about the actual experiment - will be saved. This folder will have the next format: - 'exp-YY-MM-DD-hh-mm-ss'. - """ - - - def __init__(self, output_folder): - - - # Getting experiment's folder name - folder_name = "exp-" + date.today().strftime("%y-%m-%d") \ - + "-" + datetime.now().strftime("%H-%M-%S") - - self._experiment_folder = os.path.join(output_folder, folder_name) - - - - def add_record(self, partition, best_params, best_model, configuration, metrics, predictions): - - """ - Stores information obtained from the run of one partition. - - Parameters - ---------- - - partition: string - Partition's index. - - best_params: dictionary - Best hyper-parameter's values found for this configuration - and dataset during cross-validation. If an ensemble method - has been used, there'll exist a parameter called - 'parameters' that will store a dict with the best - hyper-parameters found for the base classifier. - Keys are the name of each parameter - - best_model: estimator - Best model created during cross-validation. - - configuration: dict - Dictionary containing the name used for this pair of - dataset and configuration. Keys are 'dataset' and - 'config'. - - metrics: dict of dictionaries - Dictionary containing the metrics for train and test for - this particular configuration. It contains computational - times for both of them as well. Keys are 'train' and 'test' - - predictions: dict of lists - Dictionary that stores train and test class predictions. - Keys are 'train' and 'test'. - - """ - - - dataset_folder = os.path.join(self._experiment_folder, (configuration['dataset'] + "-" - + configuration['config'])) - models_folder = os.path.join(dataset_folder, "models") - predictions_folder = os.path.join(dataset_folder, "predictions") - - # Creating folder for this dataset-configuration if necessary - if not os.path.exists(dataset_folder): - try: - os.makedirs(models_folder) - os.makedirs(predictions_folder) - - except OSError: - raise OSError("Could not create folder %s (or subfolders) to store results." - % dataset_folder) - - - # Saving partition model - model_filename = configuration['dataset'] + "-" + configuration['config'] + "." + partition - with open(os.path.join(models_folder, model_filename), 'wb') as output: - pickle.dump(best_model, output) - - # Saving model predictions - pred_filename = configuration['dataset'] + "-" + configuration['config'] + "." + partition - np.savetxt(os.path.join(predictions_folder, 'train_' + pred_filename), - predictions['train'], fmt='%d') - - if predictions['test'] is not None: - np.savetxt(os.path.join(predictions_folder, 'test_' + pred_filename), - predictions['test'], fmt='%d') - - - dataframe_row = OrderedDict() - # Adding best parameters as first elements in row - for p_name, p_value in best_params.items(): - - """ - If some ensemble method has been used, then one of its - parameters will be a dictionary containing the best - parameters found for the base classifier. - """ - if isinstance(p_value, dict): - for (k, v) in p_value.items(): - dataframe_row[k] = v - else: - dataframe_row[p_name] = p_value - - - # Concatenating train and test metrics - for (tm_name, tm_value), (ts_name, ts_value) \ - in zip(metrics['train'].items(), metrics['test'].items()): - - dataframe_row[tm_name] = tm_value - dataframe_row[ts_name] = ts_value - - - # Adding row to existing DataFrame or creating new one - df_path = os.path.join(dataset_folder, (configuration['dataset'] + "-" - + configuration['config'] + ".csv")) - - df = pd.DataFrame([dataframe_row], index=[partition]) - if os.path.isfile(df_path): - - previous_df = pd.read_csv(df_path, index_col=[0]) - # df = previous_df.append(df) - df = pd.concat([previous_df, df], axis = 0) - - - # Saving DataFrame to file - df.to_csv(df_path) - - - - def save_summaries(self, metrics_names): - - """ - Method used to create a experiment summary, where each - dataset-configuration will be represented as a single row - of data, which will consist in the mean and standard deviation - for the different metric's values across partitions. - - Parameters - ---------- - - metrics_names: list of strings - List with the names of all metrics used during the - execution of the experiment. Includes comp. times. - - """ - - # Name of columns for summary dataframes - avg_index = [mn + '_mean' for mn in metrics_names] - std_index = [mn + '_std' for mn in metrics_names] - - train_summary = []; test_summary = []; summary_index = [] - - for folder in os.listdir(self._experiment_folder): - - df = pd.read_csv(os.path.join(self._experiment_folder, folder, folder + ".csv")) - - # Creating one entry per folder in summaries - tr_sr, ts_sr = self._create_summary(df, avg_index, std_index) - train_summary.append(tr_sr); test_summary.append(ts_sr) - summary_index.append(folder) - - - # Naming each row in datasets - train_summary = pd.concat(train_summary, axis=1).transpose() - train_summary.index = summary_index - test_summary = pd.concat(test_summary, axis=1).transpose() - test_summary.index = summary_index - - # Save summaries to csv - train_summary.to_csv(os.path.join(self._experiment_folder, "train_summary.csv")) - test_summary.to_csv(os.path.join(self._experiment_folder, "test_summary.csv")) - - - - - def _create_summary(self, df, avg_index, std_index): - - """ - Summarices information from a DataFrame into a single row. - - Parameters - ---------- - - df: DataFrame object - Dataframe representing one Dataset-Configuration. - Contains hyper-parameters, metric's scores and - computational times. - - avg_index: list of strings - Includes all names of metrics ending with '_mean' - - std_index: list of strings - Includes all names of metrics ending with '_std' - - Returns - ------- - - train_summary_row: DataFrame object - DataFrame with only one row, containing mean and - standard deviation for all metrics calculated - across partitions (including computational times). - Stores only train information. - - test_summary_row: DataFrame object - Stores only test information - """ - - # Dissociating train and test metrics - - # Number of parameters used in this configuration - n_parameters = len(df.columns) - len(avg_index)*2 - # Even columns from dataframe (train metrics) - train_df = df.iloc[:,n_parameters::2].copy() - # Odd columns (test metrics) - test_df = df.iloc[:,(n_parameters+1)::2].copy() - - - # Computing mean and standard deviation for metrics - train_avg, train_std = train_df.mean(), train_df.std() - test_avg, test_std = test_df.mean(), test_df.std() - # Naming indexes for summary dataframes - train_avg.index = avg_index; train_std.index = std_index - test_avg.index = avg_index; test_std.index = std_index - # Merging avg and std into one dataframe - train_summary_row = pd.concat([train_avg, train_std]) - test_summary_row = pd.concat([test_avg, test_std]) - - # Mixing avg and std DataFrame columns from metrics summaries - train_summary_row = train_summary_row[list(sum(zip(train_summary_row.iloc[:len(avg_index)].keys(), - train_summary_row.iloc[len(std_index):].keys()), ()))] - - test_summary_row = test_summary_row[list(sum(zip(test_summary_row.iloc[:len(avg_index)].keys(), - test_summary_row.iloc[len(std_index):].keys()), ()))] - - return train_summary_row, test_summary_row + """ + Results + + Class that handles all information from an experiment that needs + to be saved. This info will be saved into an specified folder. + + Attributes + ---------- + + _experiment_folder: string + Path where all the information about the actual experiment + will be saved. This folder will have the next format: + 'exp-YY-MM-DD-hh-mm-ss'. + """ + + def __init__(self, output_folder): + # Getting experiment's folder name + folder_name = ( + "exp-" + + date.today().strftime("%y-%m-%d") + + "-" + + datetime.now().strftime("%H-%M-%S") + ) + + self._experiment_folder = os.path.join(output_folder, folder_name) + + def add_record( + self, partition, best_params, best_model, configuration, metrics, predictions + ): + """ + Stores information obtained from the run of one partition. + + Parameters + ---------- + + partition: string + Partition's index. + + best_params: dictionary + Best hyper-parameter's values found for this configuration + and dataset during cross-validation. If an ensemble method + has been used, there'll exist a parameter called + 'parameters' that will store a dict with the best + hyper-parameters found for the base classifier. + Keys are the name of each parameter + + best_model: estimator + Best model created during cross-validation. + + configuration: dict + Dictionary containing the name used for this pair of + dataset and configuration. Keys are 'dataset' and + 'config'. + + metrics: dict of dictionaries + Dictionary containing the metrics for train and test for + this particular configuration. It contains computational + times for both of them as well. Keys are 'train' and 'test' + + predictions: dict of lists + Dictionary that stores train and test class predictions. + Keys are 'train' and 'test'. + + """ + + dataset_folder = os.path.join( + self._experiment_folder, + (configuration["dataset"] + "-" + configuration["config"]), + ) + models_folder = os.path.join(dataset_folder, "models") + predictions_folder = os.path.join(dataset_folder, "predictions") + + # Creating folder for this dataset-configuration if necessary + if not os.path.exists(dataset_folder): + try: + os.makedirs(models_folder) + os.makedirs(predictions_folder) + + except OSError: + raise OSError( + "Could not create folder %s (or subfolders) to store results." + % dataset_folder + ) + + # Saving partition model + model_filename = ( + configuration["dataset"] + "-" + configuration["config"] + "." + partition + ) + with open(os.path.join(models_folder, model_filename), "wb") as output: + pickle.dump(best_model, output) + + # Saving model predictions + pred_filename = ( + configuration["dataset"] + "-" + configuration["config"] + "." + partition + ) + np.savetxt( + os.path.join(predictions_folder, "train_" + pred_filename), + predictions["train"], + fmt="%d", + ) + + if predictions["test"] is not None: + np.savetxt( + os.path.join(predictions_folder, "test_" + pred_filename), + predictions["test"], + fmt="%d", + ) + + dataframe_row = OrderedDict() + # Adding best parameters as first elements in row + for p_name, p_value in best_params.items(): + """ + If some ensemble method has been used, then one of its + parameters will be a dictionary containing the best + parameters found for the base classifier. + """ + if isinstance(p_value, dict): + for k, v in p_value.items(): + dataframe_row[k] = v + else: + dataframe_row[p_name] = p_value + + # Concatenating train and test metrics + for (tm_name, tm_value), (ts_name, ts_value) in zip( + metrics["train"].items(), metrics["test"].items() + ): + dataframe_row[tm_name] = tm_value + dataframe_row[ts_name] = ts_value + + # Adding row to existing DataFrame or creating new one + df_path = os.path.join( + dataset_folder, + (configuration["dataset"] + "-" + configuration["config"] + ".csv"), + ) + + df = pd.DataFrame([dataframe_row], index=[partition]) + if os.path.isfile(df_path): + previous_df = pd.read_csv(df_path, index_col=[0]) + # df = previous_df.append(df) + df = pd.concat([previous_df, df], axis=0) + + # Saving DataFrame to file + df.to_csv(df_path) + + def save_summaries(self, metrics_names): + """ + Method used to create a experiment summary, where each + dataset-configuration will be represented as a single row + of data, which will consist in the mean and standard deviation + for the different metric's values across partitions. + + Parameters + ---------- + + metrics_names: list of strings + List with the names of all metrics used during the + execution of the experiment. Includes comp. times. + + """ + + # Name of columns for summary dataframes + avg_index = [mn + "_mean" for mn in metrics_names] + std_index = [mn + "_std" for mn in metrics_names] + + train_summary = [] + test_summary = [] + summary_index = [] + + for folder in os.listdir(self._experiment_folder): + df = pd.read_csv( + os.path.join(self._experiment_folder, folder, folder + ".csv") + ) + + # Creating one entry per folder in summaries + tr_sr, ts_sr = self._create_summary(df, avg_index, std_index) + train_summary.append(tr_sr) + test_summary.append(ts_sr) + summary_index.append(folder) + + # Naming each row in datasets + train_summary = pd.concat(train_summary, axis=1).transpose() + train_summary.index = summary_index + test_summary = pd.concat(test_summary, axis=1).transpose() + test_summary.index = summary_index + + # Save summaries to csv + train_summary.to_csv(os.path.join(self._experiment_folder, "train_summary.csv")) + test_summary.to_csv(os.path.join(self._experiment_folder, "test_summary.csv")) + + def _create_summary(self, df, avg_index, std_index): + """ + Summarices information from a DataFrame into a single row. + + Parameters + ---------- + + df: DataFrame object + Dataframe representing one Dataset-Configuration. + Contains hyper-parameters, metric's scores and + computational times. + + avg_index: list of strings + Includes all names of metrics ending with '_mean' + + std_index: list of strings + Includes all names of metrics ending with '_std' + + Returns + ------- + + train_summary_row: DataFrame object + DataFrame with only one row, containing mean and + standard deviation for all metrics calculated + across partitions (including computational times). + Stores only train information. + + test_summary_row: DataFrame object + Stores only test information + """ + + # Dissociating train and test metrics + + # Number of parameters used in this configuration + n_parameters = len(df.columns) - len(avg_index) * 2 + # Even columns from dataframe (train metrics) + train_df = df.iloc[:, n_parameters::2].copy() + # Odd columns (test metrics) + test_df = df.iloc[:, (n_parameters + 1) :: 2].copy() + + # Computing mean and standard deviation for metrics + train_avg, train_std = train_df.mean(), train_df.std() + test_avg, test_std = test_df.mean(), test_df.std() + # Naming indexes for summary dataframes + train_avg.index = avg_index + train_std.index = std_index + test_avg.index = avg_index + test_std.index = std_index + # Merging avg and std into one dataframe + train_summary_row = pd.concat([train_avg, train_std]) + test_summary_row = pd.concat([test_avg, test_std]) + + # Mixing avg and std DataFrame columns from metrics summaries + train_summary_row = train_summary_row[ + list( + sum( + zip( + train_summary_row.iloc[: len(avg_index)].keys(), + train_summary_row.iloc[len(std_index) :].keys(), + ), + (), + ) + ) + ] + + test_summary_row = test_summary_row[ + list( + sum( + zip( + test_summary_row.iloc[: len(avg_index)].keys(), + test_summary_row.iloc[len(std_index) :].keys(), + ), + (), + ) + ) + ] + + return train_summary_row, test_summary_row diff --git a/orca_python/utilities.py b/orca_python/utilities.py index daf4702..db50a21 100644 --- a/orca_python/utilities.py +++ b/orca_python/utilities.py @@ -22,552 +22,559 @@ class Utilities: - """ - Utilities + """ + Utilities - Class in charge of running an experiment over N datasets, where we - apply M different configurations over each dataset. + Class in charge of running an experiment over N datasets, where we + apply M different configurations over each dataset. - Configurations are composed of a classifier method and different - parameters, where it may be multiple values for every one of them. + Configurations are composed of a classifier method and different + parameters, where it may be multiple values for every one of them. - Running the main function of this class will perform - cross-validation for each partition per dataset-configuration pairs, - obtaining the most optimal model, after what will be used to infere - the labels for the test sets. + Running the main function of this class will perform + cross-validation for each partition per dataset-configuration pairs, + obtaining the most optimal model, after what will be used to infere + the labels for the test sets. + + + Parameters + ---------- + + general_conf: dict + Dictionary containing values needed to run the experiment. + It gives this class information about where are located the + different datasets, which one are going to be tested, the + metrics to use, etc. + + configurations: dict + Dictionary in which are stated the different classifiers + to build methods upon the selected datasets, as well as + the different values for the hyper-parameters used to + optimize the model during cross-validation phase. + + verbose: boolean + Variable used for testing purposes. Silences all prints. + + For more usage information, read User Guide of this framework. + + + Attributes + ---------- + + _results: Results object + Class used to manage and store all information obtained + during the run of an experiment. + """ + + def __init__(self, general_conf, configurations, verbose=True): + self.general_conf = deepcopy(general_conf) + self.configurations = deepcopy(configurations) + self.verbose = verbose + + syspath.append("classifiers") + + def run_experiment(self): + """ + Runs an experiment. Main method of this framework. + + Loads all datasets, which can be fragmented in partitions. + Builds a model per partition, using cross-validation to find + the optimal values among the hyper-parameters to compare from. + + Uses the built model to get train and test metrics, storing all + the information into a Results object. + """ + + self._results = Results(self.general_conf["output_folder"]) + + self._check_dataset_list() + self._check_params() + + if self.verbose: + print("\n###############################") + print("\tRunning Experiment") + print("###############################") + + # Iterating over Datasets + for x in self.general_conf["datasets"]: + dataset_name = x.strip() + dataset_path = os.path.join(self.general_conf["basedir"], dataset_name) + + dataset = self._load_dataset(dataset_path) + + if self.verbose: + print("\nRunning", dataset_name, "dataset") + print("--------------------------") + + # Iterating over Configurations + for conf_name, configuration in self.configurations.items(): + if self.verbose: + print("Running", conf_name, "...") + + print("Load_classifier") + classifier = load_classifier(configuration["classifier"]) + + # Iterating over partitions + for part_idx, partition in dataset: + if self.verbose: + print(" Running Partition", part_idx) + + # Normalization or Standardization of the partition if requested + if ( + self.general_conf["input_preprocessing"].strip().lower() + == "norm" + ): + ( + partition["train_inputs"], + partition["test_inputs"], + ) = self._normalize_data( + partition["train_inputs"], partition["test_inputs"] + ) + elif ( + self.general_conf["input_preprocessing"].strip().lower() + == "std" + ): + ( + partition["train_inputs"], + partition["test_inputs"], + ) = self._standardize_data( + partition["train_inputs"], partition["test_inputs"] + ) + + elif self.general_conf["input_preprocessing"].strip().lower() != "": + raise AttributeError( + "Input preprocessing named '%s' unknown" + % self.general_conf["input_preprocessing"].strip().lower() + ) + + optimal_estimator = self._get_optimal_estimator( + partition["train_inputs"], + partition["train_outputs"], + classifier, + configuration["parameters"], + ) + + # Getting train and test predictions + train_predicted_y = optimal_estimator.predict( + partition["train_inputs"] + ) + + test_predicted_y = None + elapsed = np.nan + if "test_outputs" in partition: + start = time() + test_predicted_y = optimal_estimator.predict( + partition["test_inputs"] + ) + elapsed = time() - start + + # Obtaining train and test metrics values. + train_metrics = OrderedDict() + test_metrics = OrderedDict() + for metric_name in self.general_conf["metrics"]: + try: + # Loading metric from file + module = __import__("orca_python").metrics + metric = getattr(module, metric_name) + + except AttributeError: + raise AttributeError("No metric named '%s'" % metric_name) + + # Get train scores + train_score = metric( + partition["train_outputs"], train_predicted_y + ) + train_metrics[metric_name.strip() + "_train"] = train_score + + # Get test scores + test_metrics[metric_name.strip() + "_test"] = np.nan + if "test_outputs" in partition: + test_score = metric( + partition["test_outputs"], test_predicted_y + ) + test_metrics[metric_name.strip() + "_test"] = test_score + + # Cross-validation was performed to tune hyper-parameters + if isinstance(optimal_estimator, GridSearchCV): + train_metrics["cv_time_train"] = optimal_estimator.cv_results_[ + "mean_fit_time" + ].mean() + test_metrics["cv_time_test"] = optimal_estimator.cv_results_[ + "mean_score_time" + ].mean() + train_metrics["time_train"] = optimal_estimator.refit_time_ + test_metrics["time_test"] = elapsed + + else: + optimal_estimator.best_params_ = configuration["parameters"] + optimal_estimator.best_estimator_ = optimal_estimator + + train_metrics["cv_time_train"] = np.nan + test_metrics["cv_time_test"] = np.nan + train_metrics["time_train"] = optimal_estimator.refit_time_ + test_metrics["time_test"] = elapsed + + # Saving the results for this partition + self._results.add_record( + part_idx, + optimal_estimator.best_params_, + optimal_estimator.best_estimator_, + {"dataset": dataset_name, "config": conf_name}, + {"train": train_metrics, "test": test_metrics}, + {"train": train_predicted_y, "test": test_predicted_y}, + ) + + def _load_dataset(self, dataset_path): + """ + Loads all dataset's files, divided into train and test. + + Parameters + ---------- + + dataset_path: string + Path to dataset folder. + + + Returns + ------- + + partition_list: list of tuples + List of partitions found inside a dataset folder. + Each partition is stored into a dictionary, disjoining + train and test inputs and outputs. + """ + + try: + # Creating dicts for all partitions (saving partition order as keys) + partition_list = { + filename[filename.find(".") + 1 :]: {} + for filename in os.listdir(dataset_path) + if filename.startswith("train_") + } + + # Loading each dataset + for filename in os.listdir(dataset_path): + if filename.startswith("train_"): + train_inputs, train_outputs = self._read_file( + os.path.join(dataset_path, filename) + ) + partition_list[filename[filename.find(".") + 1 :]][ + "train_inputs" + ] = train_inputs + partition_list[filename[filename.find(".") + 1 :]][ + "train_outputs" + ] = train_outputs + + elif filename.startswith("test_"): + test_inputs, test_outputs = self._read_file( + os.path.join(dataset_path, filename) + ) + partition_list[filename[filename.find(".") + 1 :]][ + "test_inputs" + ] = test_inputs + partition_list[filename[filename.find(".") + 1 :]][ + "test_outputs" + ] = test_outputs + + except OSError: + raise ValueError("No such file or directory: '%s'" % dataset_path) + + except KeyError: + raise RuntimeError( + "Found partition without train files: partition %s" + % filename[filename.find(".") + 1 :] + ) + + # Saving partitions as a sorted list of (index, partition) tuples + partition_list = sorted(partition_list.items(), key=(lambda t: get_key(t[0]))) + + return partition_list + + def _read_file(self, filename): + """ + Reads a CSV containing partitions, or full datasets. + Train and test files must be previously divided for + the experiment to run. + + Parameters + ---------- + + filename: string + Full path to train or test file. + + + Returns + ------- + + inputs: {array-like, sparse-matrix}, shape (n_samples, n_features) + Vector of sample's features. + + outputs: array-like, shape (n_samples) + Target vector relative to inputs. + + """ + + # Separator is automatically found + f = pd.read_csv(filename, header=None, engine="python", sep=None) + + inputs = f.values[:, 0:(-1)] + outputs = f.values[:, (-1)] + + return inputs, outputs + + def _check_dataset_list(self): + """ + Checks if there is some inconsistency in the dataset list. + It also simplifies running all datasets inside one folder. + + Parameters + ---------- + dataset_list: list of strings + list containing all the dataset names to run in a given + experiment. + If 'all' is specified without any other string, then all + datasets in basedir folder will be run. + """ + + base_path = self.general_conf["basedir"] + dataset_list = self.general_conf["datasets"] + + # Check if home path is shortened + if base_path.startswith("~"): + base_path = base_path.replace("~", os.path.expanduser("~"), 1) + + # Compatibility between python 2 and 3 + try: + basestring = (unicode, str) + except NameError: + basestring = str + + # Check if 'all' is the only value, and if it is, expand it + if len(dataset_list) == 1 and dataset_list[0] == "all": + dataset_list = [ + item + for item in os.listdir(base_path) + if os.path.isdir(os.path.join(base_path, item)) + ] + + elif not all(isinstance(item, basestring) for item in dataset_list): + raise ValueError("Dataset list can only contain strings") + + self.general_conf["basedir"] = base_path + self.general_conf["datasets"] = dataset_list + + def _normalize_data(self, train_data, test_data): + """ + Normalize the data. Test data normalization will be based on train data + + Parameters + ---------- + train_data: 2d array + contain the train data features + test_data: 2d array + contain the test data features + """ + + mm_scaler = preprocessing.MinMaxScaler().fit(train_data) + + return mm_scaler.transform(train_data), mm_scaler.transform(test_data) + + def _standardize_data(self, train_data, test_data): + """ + Standardize the data. Test data standardization will be based on train data + + Parameters + ---------- + train_data: 2d array + contain the train data features + test_data: 2d array + contain the test data features + """ + + std_scaler = preprocessing.StandardScaler().fit(train_data) + + return std_scaler.transform(train_data), std_scaler.transform(test_data) + + def _check_params(self): + """ + Checks if all given configurations are sintactly correct. + + Performs two different transformations over parameter + dictionaries when needed: + + - If one parameter's values are not inside a list, GridSearchCV + will not be able to handle them, so they must be enclosed into one. + + - When an ensemble method, as OrderedPartitions, is chosen as + classifier, transforms the dict of lists in which the + parameters for the internal classifier are stated into a list + of dicts (all possible combiantions of those different parameters). + """ + + random_seed = np.random.get_state()[1][0] + for _, conf in self.configurations.items(): + parameters = conf["parameters"] # Aliasing + + # Adding given seed as random_state value + if check_for_random_state(conf["classifier"]): + parameters["random_state"] = [random_seed] + + # An ensemble method is going to be used + if "parameters" in parameters and type(parameters["parameters"] == dict): + # Adding given seed as random_state value + if check_for_random_state(parameters["base_classifier"]): + parameters["parameters"]["random_state"] = [random_seed] + + try: + # Creating a list for each parameter. + # Elements represented as 'parameterName;parameterValue'. + p_list = [ + [p_name + ";" + str(v) for v in p] + for p_name, p in parameters["parameters"].items() + ] + # Permutations of all lists. Generates all possible + # combination of elements between lists. + p_list = [list(item) for item in list(product(*p_list))] + # Creates a list of dictionaries, containing all + # combinations of given parameters + p_list = [dict([item.split(";") for item in p]) for p in p_list] + + except TypeError: + raise TypeError("All parameters for base_classifier must be list") + + # Returns non-string values back to it's normal self + for d in p_list: + for k, v in d.items(): + try: + d[k] = literal_eval(v) + except ValueError: + pass + + parameters["parameters"] = p_list + + # No need to cross-validate when there is just one value per parameter + if all( + not isinstance(p, list) or len(p) == 1 for _, p in parameters.items() + ): + # Pop lonely values out of list + for p_name, p in parameters.items(): + if isinstance(p, list): + parameters[p_name] = p[0] + + else: + # Convert non-list values to lists + for p_name, p in parameters.items(): + if not isinstance(p, list) and not isinstance(p, dict): + parameters[p_name] = [p] + + def _get_optimal_estimator( + self, train_inputs, train_outputs, classifier, parameters + ): + """ + Perform cross-validation over one dataset and configuration. + + Each configuration consists of one classifier and none, one or + multiple hyper-parameters, that, in turn, can contain one or + multiple values used to optimize the resulting model. + + At the end of cross-validation phase, the model with the + especific combination of values from the hyper-parameters + that achieved the best metrics from all the combinations + will remain. + + Parameters + ---------- + + train_inputs: {array-like, sparse-matrix}, shape (n_samples, n_features) + vector of features for each sample for this dataset. + + train_outputs: array-like, shape (n_samples) + Target vector relative to train_inputs. + + classifier: object + Class implementing a mathematical model able to be trained + and to perform predictions over given datasets. + + parameters: dictionary + Dictionary containing parameters to optimize as keys, + and the list of values that we want to compare as values. + + Returns + ------- + + optimal: GridSearchCV object or classifier object + An already fitted model of the given classifier, + with the best found parameters after cross-validation. + If cross-validation is not needed, it will return the + classifier model already trained. + """ + + # No need to cross-validate when there is just one value per parameter + if all(not isinstance(p, list) for k, p in parameters.items()): + optimal = classifier(**parameters) + + start = time() + optimal.fit(train_inputs, train_outputs) + elapsed = time() - start + + optimal.refit_time_ = elapsed + return optimal + + try: + module = __import__("orca_python").metrics + metric = getattr(module, self.general_conf["cv_metric"].lower().strip()) + + except AttributeError: + if not isinstance(self.general_conf["cv_metric"], str): + raise AttributeError("cv_metric must be string") + + raise AttributeError( + "No metric named '%s' implemented" + % self.general_conf["cv_metric"].strip().lower() + ) + + # Making custom metrics compatible with sklearn + gib = module.greater_is_better(self.general_conf["cv_metric"].lower().strip()) + scoring_function = make_scorer(metric, greater_is_better=gib) + + # Creating object to split train data for cross-validation + # This will make GridSearch have a pseudo-random behaviour + skf = StratifiedKFold( + n_splits=self.general_conf["hyperparam_cv_nfolds"], + shuffle=True, + random_state=np.random.get_state()[1][0], + ) + + # Performing cross-validation phase + optimal = GridSearchCV( + estimator=classifier(), + param_grid=parameters, + scoring=scoring_function, + n_jobs=self.general_conf["jobs"], + cv=skf, + ) + + optimal.fit(train_inputs, train_outputs) + + return optimal + + def write_report(self): + """ + Saves summarized information about experiment + through Results class. + """ + + if self.verbose: + print("\nSaving Results...") + # Names of each metric used (plus computational times) + metrics_names = [x.strip().lower() for x in self.general_conf["metrics"]] + [ + "cv_time", + "time", + ] - Parameters - ---------- - - general_conf: dict - Dictionary containing values needed to run the experiment. - It gives this class information about where are located the - different datasets, which one are going to be tested, the - metrics to use, etc. - - configurations: dict - Dictionary in which are stated the different classifiers - to build methods upon the selected datasets, as well as - the different values for the hyper-parameters used to - optimize the model during cross-validation phase. - - verbose: boolean - Variable used for testing purposes. Silences all prints. - - For more usage information, read User Guide of this framework. - - - Attributes - ---------- - - _results: Results object - Class used to manage and store all information obtained - during the run of an experiment. - """ - - - def __init__(self, general_conf, configurations, verbose=True): - - - self.general_conf = deepcopy(general_conf) - self.configurations = deepcopy(configurations) - self.verbose = verbose - - syspath.append('classifiers') - - - def run_experiment(self): - - """ - Runs an experiment. Main method of this framework. - - Loads all datasets, which can be fragmented in partitions. - Builds a model per partition, using cross-validation to find - the optimal values among the hyper-parameters to compare from. - - Uses the built model to get train and test metrics, storing all - the information into a Results object. - """ - - self._results = Results(self.general_conf['output_folder']) - - self._check_dataset_list() - self._check_params() - - - if self.verbose: - print("\n###############################") - print("\tRunning Experiment") - print("###############################") - - # Iterating over Datasets - for x in self.general_conf['datasets']: - - dataset_name = x.strip() - dataset_path = os.path.join(self.general_conf['basedir'], dataset_name) - - - dataset = self._load_dataset(dataset_path) - - if self.verbose: - print("\nRunning", dataset_name, "dataset") - print("--------------------------") - - - # Iterating over Configurations - for conf_name, configuration in self.configurations.items(): - - if self.verbose: - print("Running", conf_name, "...") - - - classifier = load_classifier(configuration["classifier"]) - - # Iterating over partitions - for part_idx, partition in dataset: - - if self.verbose: - print(" Running Partition", part_idx) - - - #Normalization or Standardization of the partition if requested - if self.general_conf['input_preprocessing'].strip().lower() == 'norm': - partition["train_inputs"], partition["test_inputs"] = self._normalize_data(partition["train_inputs"], partition["test_inputs"]) - elif self.general_conf['input_preprocessing'].strip().lower() == 'std': - partition["train_inputs"], partition["test_inputs"] = self._standardize_data(partition["train_inputs"], partition["test_inputs"]) - - elif self.general_conf['input_preprocessing'].strip().lower() != '': - raise AttributeError("Input preprocessing named '%s' unknown" % self.general_conf['input_preprocessing'].strip().lower()) - - optimal_estimator = self._get_optimal_estimator(partition["train_inputs"], - partition["train_outputs"], - classifier, - configuration["parameters"]) - - # Getting train and test predictions - train_predicted_y = optimal_estimator.predict(partition["train_inputs"]) - - test_predicted_y = None; elapsed = np.nan - if "test_outputs" in partition: - start = time() - test_predicted_y = optimal_estimator.predict(partition["test_inputs"]) - elapsed = time() - start - - - # Obtaining train and test metrics values. - train_metrics = OrderedDict(); test_metrics = OrderedDict() - for metric_name in self.general_conf['metrics']: - - try: - # Loading metric from file - module = __import__("orca_python").metrics - metric = getattr(module, self.general_conf['cv_metric'].lower().strip()) - - except AttributeError: - raise AttributeError("No metric named '%s'" - % metric_name.strip().lower()) - - # Get train scores - train_score = metric(partition["train_outputs"], train_predicted_y) - train_metrics[metric_name.strip() + '_train'] = train_score - - # Get test scores - test_metrics[metric_name.strip() + '_test'] = np.nan - if "test_outputs" in partition: - test_score = metric(partition["test_outputs"], test_predicted_y) - test_metrics[metric_name.strip() + '_test'] = test_score - - - # Cross-validation was performed to tune hyper-parameters - if isinstance(optimal_estimator, GridSearchCV): - train_metrics['cv_time_train'] = optimal_estimator.cv_results_['mean_fit_time'].mean() - test_metrics['cv_time_test'] = optimal_estimator.cv_results_['mean_score_time'].mean() - train_metrics['time_train'] = optimal_estimator.refit_time_ - test_metrics['time_test'] = elapsed - - - else: - optimal_estimator.best_params_ = configuration['parameters'] - optimal_estimator.best_estimator_ = optimal_estimator - - train_metrics['cv_time_train'] = np.nan - test_metrics['cv_time_test'] = np.nan - train_metrics['time_train'] = optimal_estimator.refit_time_ - test_metrics['time_test'] = elapsed - - - # Saving the results for this partition - self._results.add_record(part_idx, optimal_estimator.best_params_, - optimal_estimator.best_estimator_, - {'dataset': dataset_name, 'config': conf_name}, - {'train': train_metrics, 'test': test_metrics}, - {'train': train_predicted_y, 'test': test_predicted_y}) - - - - def _load_dataset(self, dataset_path): - - """ - Loads all dataset's files, divided into train and test. - - Parameters - ---------- - - dataset_path: string - Path to dataset folder. - - - Returns - ------- - - partition_list: list of tuples - List of partitions found inside a dataset folder. - Each partition is stored into a dictionary, disjoining - train and test inputs and outputs. - """ - - - try: - - # Creating dicts for all partitions (saving partition order as keys) - partition_list = {filename[filename.find('.') + 1:]: {} for filename - in os.listdir(dataset_path) - if filename.startswith("train_")} - - # Loading each dataset - for filename in os.listdir(dataset_path): - - if filename.startswith("train_"): - train_inputs, train_outputs = self._read_file(os.path.join(dataset_path, filename)) - partition_list[filename[filename.find('.') + 1:]]["train_inputs"] = train_inputs - partition_list[filename[filename.find('.') + 1:]]["train_outputs"] = train_outputs - - elif filename.startswith("test_"): - test_inputs, test_outputs = self._read_file(os.path.join(dataset_path, filename)) - partition_list[filename[filename.find('.') + 1:]]["test_inputs"] = test_inputs - partition_list[filename[filename.find('.') + 1:]]["test_outputs"] = test_outputs - - except OSError: - raise ValueError("No such file or directory: '%s'" % dataset_path) - - except KeyError: - raise RuntimeError("Found partition without train files: partition %s" - % filename[filename.find('.') + 1:]) - - - # Saving partitions as a sorted list of (index, partition) tuples - partition_list = sorted(partition_list.items(), key=(lambda t: get_key(t[0]))) - - return partition_list - - - - def _read_file(self, filename): - - """ - Reads a CSV containing partitions, or full datasets. - Train and test files must be previously divided for - the experiment to run. - - Parameters - ---------- - - filename: string - Full path to train or test file. - - - Returns - ------- - - inputs: {array-like, sparse-matrix}, shape (n_samples, n_features) - Vector of sample's features. - - outputs: array-like, shape (n_samples) - Target vector relative to inputs. - - """ - - # Separator is automatically found - f = pd.read_csv(filename, header=None, engine='python', sep=None) - - inputs = f.values[:,0:(-1)] - outputs = f.values[:,(-1)] - - return inputs, outputs - - - - def _check_dataset_list(self): - - """ - Checks if there is some inconsistency in the dataset list. - It also simplifies running all datasets inside one folder. - - Parameters - ---------- - dataset_list: list of strings - list containing all the dataset names to run in a given - experiment. - If 'all' is specified without any other string, then all - datasets in basedir folder will be run. - """ - - - base_path = self.general_conf['basedir'] - dataset_list = self.general_conf['datasets'] - - # Check if home path is shortened - if base_path.startswith("~"): - base_path = base_path.replace('~', os.path.expanduser('~'), 1) - - - # Compatibility between python 2 and 3 - try: - basestring = (unicode, str) - except NameError: - basestring = str - - # Check if 'all' is the only value, and if it is, expand it - if len(dataset_list) == 1 and dataset_list[0] == 'all': - - dataset_list = [item for item in os.listdir(base_path) \ - if os.path.isdir(os.path.join(base_path, item))] - - elif not all(isinstance(item, basestring) for item in dataset_list): - raise ValueError("Dataset list can only contain strings") - - - self.general_conf['basedir'] = base_path - self.general_conf['datasets'] = dataset_list - - - - def _normalize_data(self, train_data, test_data): - - """ - Normalize the data. Test data normalization will be based on train data - - Parameters - ---------- - train_data: 2d array - contain the train data features - test_data: 2d array - contain the test data features - """ - - - mm_scaler = preprocessing.MinMaxScaler().fit(train_data) - - return mm_scaler.transform(train_data), mm_scaler.transform(test_data) - - - - def _standardize_data(self, train_data, test_data): - - """ - Standardize the data. Test data standardization will be based on train data - - Parameters - ---------- - train_data: 2d array - contain the train data features - test_data: 2d array - contain the test data features - """ - - - std_scaler = preprocessing.StandardScaler().fit(train_data) - - return std_scaler.transform(train_data), std_scaler.transform(test_data) - - - - def _check_params(self): - - """ - Checks if all given configurations are sintactly correct. - - Performs two different transformations over parameter - dictionaries when needed: - - - If one parameter's values are not inside a list, GridSearchCV - will not be able to handle them, so they must be enclosed into one. - - - When an ensemble method, as OrderedPartitions, is chosen as - classifier, transforms the dict of lists in which the - parameters for the internal classifier are stated into a list - of dicts (all possible combiantions of those different parameters). - """ - - random_seed = np.random.get_state()[1][0] - for _, conf in self.configurations.items(): - - - parameters = conf['parameters'] # Aliasing - - # Adding given seed as random_state value - if check_for_random_state(conf['classifier']): - parameters['random_state'] = [random_seed] - - - # An ensemble method is going to be used - if 'parameters' in parameters and type(parameters['parameters'] == dict): - - # Adding given seed as random_state value - if check_for_random_state(parameters['base_classifier']): - parameters['parameters']['random_state'] = [random_seed] - - - try: - - # Creating a list for each parameter. - # Elements represented as 'parameterName;parameterValue'. - p_list = [[p_name + ';' + str(v) for v in p] for p_name, p in - parameters['parameters'].items()] - # Permutations of all lists. Generates all possible - # combination of elements between lists. - p_list = [list(item) for item in list(product(*p_list))] - # Creates a list of dictionaries, containing all - # combinations of given parameters - p_list = [dict([item.split(';') for item in p]) for p in p_list] - - except TypeError: - raise TypeError('All parameters for base_classifier must be list') - - - # Returns non-string values back to it's normal self - for d in p_list: - for (k, v) in d.items(): - - try: - d[k] = literal_eval(v) - except ValueError: - pass - - parameters['parameters'] = p_list - - - # No need to cross-validate when there is just one value per parameter - if all(not isinstance(p, list) or len(p) == 1 for _, p in parameters.items()): - # Pop lonely values out of list - for p_name, p in parameters.items(): - if isinstance(p, list): - parameters[p_name] = p[0] - - else: - # Convert non-list values to lists - for p_name, p in parameters.items(): - if not isinstance(p, list) and not isinstance(p, dict): - parameters[p_name] = [p] - - - - def _get_optimal_estimator(self, train_inputs, train_outputs, classifier, parameters): - - """ - Perform cross-validation over one dataset and configuration. - - Each configuration consists of one classifier and none, one or - multiple hyper-parameters, that, in turn, can contain one or - multiple values used to optimize the resulting model. - - At the end of cross-validation phase, the model with the - especific combination of values from the hyper-parameters - that achieved the best metrics from all the combinations - will remain. - - Parameters - ---------- - - train_inputs: {array-like, sparse-matrix}, shape (n_samples, n_features) - vector of features for each sample for this dataset. - - train_outputs: array-like, shape (n_samples) - Target vector relative to train_inputs. - - classifier: object - Class implementing a mathematical model able to be trained - and to perform predictions over given datasets. - - parameters: dictionary - Dictionary containing parameters to optimize as keys, - and the list of values that we want to compare as values. - - Returns - ------- - - optimal: GridSearchCV object or classifier object - An already fitted model of the given classifier, - with the best found parameters after cross-validation. - If cross-validation is not needed, it will return the - classifier model already trained. - """ - - - # No need to cross-validate when there is just one value per parameter - if all(not isinstance(p, list) for k, p in parameters.items()): - - optimal = classifier(**parameters) - - start = time() - optimal.fit(train_inputs, train_outputs) - elapsed = time() - start - - optimal.refit_time_ = elapsed - return optimal - - - try: - module = __import__("orca_python").metrics - metric = getattr(module, self.general_conf['cv_metric'].lower().strip()) - - except AttributeError: - - if not isinstance(self.general_conf['cv_metric'], str): - raise AttributeError("cv_metric must be string") - - raise AttributeError("No metric named '%s' implemented" - % self.general_conf['cv_metric'].strip().lower()) - - - # Making custom metrics compatible with sklearn - gib = module.greater_is_better(self.general_conf['cv_metric'].lower().strip()) - scoring_function = make_scorer(metric, greater_is_better=gib) - - # Creating object to split train data for cross-validation - # This will make GridSearch have a pseudo-random beheaviour - skf = StratifiedKFold(n_splits=self.general_conf['hyperparam_cv_nfolds'], - shuffle=True, random_state=np.random.get_state()[1][0]) - - # Performing cross-validation phase - optimal = GridSearchCV(estimator=classifier(), param_grid=parameters, scoring=scoring_function, - n_jobs=self.general_conf['jobs'], cv=skf) - - optimal.fit(train_inputs, train_outputs) - - return optimal - - - - - def write_report(self): - - """ - Saves summarized information about experiment - through Results class. - """ - - if self.verbose: - print("\nSaving Results...") - - # Names of each metric used (plus computational times) - metrics_names = [x.strip().lower() for x in self.general_conf['metrics']] \ - + ["cv_time", "time"] - - # Saving results through Results class - self._results.save_summaries(metrics_names) - + # Saving results through Results class + self._results.save_summaries(metrics_names) ########################## @@ -576,136 +583,128 @@ def write_report(self): def check_packages_version(): - - """ - Checks if minimum version of packages used by this - framework are installed. - """ - - - print("Checking packages version...") - - print("NumPy...", end=" ") - if parse_version(get_distribution("numpy").version) < parse_version("1.15.2"): - print("OUTDATED. Upgrade to 1.15.2 or newer") - else: - print("OK") - - print("Pandas...", end=" ") - if parse_version(get_distribution("pandas").version) < parse_version("0.23.4"): - print("OUTDATED. Upgrade to 0.23.4 or newer") - else: - print("OK") - - print("Sacred...", end=" ") - if parse_version(get_distribution("sacred").version) < parse_version("0.7.3"): - print("OUTDATED. Upgrade to 0.7.3 or newer") - else: - print("OK") - - print("Scikit-Learn...", end=" ") - if parse_version(get_distribution("scikit-learn").version) < parse_version("0.20.0"): - print("OUTDATED. Upgrade to 0.20.0 or newer") - else: - print("OK") - - print("SciPy...", end=" ") - if parse_version(get_distribution("scipy").version) < parse_version("1.1.0"): - print("OUTDATED. Upgrade to 1.1.0 or newer") - else: - print("OK") - + """ + Checks if minimum version of packages used by this + framework are installed. + """ + + print("Checking packages version...") + + print("NumPy...", end=" ") + if parse_version(get_distribution("numpy").version) < parse_version("1.15.2"): + print("OUTDATED. Upgrade to 1.15.2 or newer") + else: + print("OK") + + print("Pandas...", end=" ") + if parse_version(get_distribution("pandas").version) < parse_version("0.23.4"): + print("OUTDATED. Upgrade to 0.23.4 or newer") + else: + print("OK") + + print("Sacred...", end=" ") + if parse_version(get_distribution("sacred").version) < parse_version("0.7.3"): + print("OUTDATED. Upgrade to 0.7.3 or newer") + else: + print("OK") + + print("Scikit-Learn...", end=" ") + if parse_version(get_distribution("scikit-learn").version) < parse_version( + "0.20.0" + ): + print("OUTDATED. Upgrade to 0.20.0 or newer") + else: + print("OK") + + print("SciPy...", end=" ") + if parse_version(get_distribution("scipy").version) < parse_version("1.1.0"): + print("OUTDATED. Upgrade to 1.1.0 or newer") + else: + print("OK") def load_classifier(classifier_path, params=None): + """ + Loads and returns a classifier. - """ - Loads and returns a classifier. - - Parameters - ---------- + Parameters + ---------- - classifier_path: string - Package path where the classifier class is located in. - That module can be local if the classifier is built inside the - framework, or relative to scikit-learn package. + classifier_path: string + Package path where the classifier class is located in. + That module can be local if the classifier is built inside the + framework, or relative to scikit-learn package. - params: dictionary - Parameters to initialize the classifier with. Used when loading - a classifiers inside of an ensemble algorithm (base_classifier) + params: dictionary + Parameters to initialize the classifier with. Used when loading + a classifiers inside of an ensemble algorithm (base_classifier) - Returns - ------- + Returns + ------- - classifier: object - Returns a loaded classifier, either from an scikit-learn - module, or from a module of this framework. - Depending if hyper-parameters are specified, the object will be - instantiated or not. + classifier: object + Returns a loaded classifier, either from an scikit-learn + module, or from a module of this framework. + Depending if hyper-parameters are specified, the object will be + instantiated or not. - """ + """ - # Path to framework local classifier - if (len(classifier_path.split('.')) == 1): - classifier = __import__(classifier_path) - classifier = getattr(classifier, classifier_path) + # Path to framework local classifier + if len(classifier_path.split(".")) == 1: + classifier = __import__(classifier_path) + classifier = getattr(classifier, classifier_path) - # Path to Scikit-Learn classifier - else: + # Path to Scikit-Learn classifier + else: + classifier = __import__(classifier_path.rsplit(".", 1)[0], fromlist="None") + classifier = getattr(classifier, classifier_path.rsplit(".", 1)[1]) - classifier = __import__(classifier_path.rsplit('.', 1)[0], fromlist="None") - classifier = getattr(classifier, classifier_path.rsplit('.', 1)[1]) - - # Instancing meta-classifier with given parameters - if params is not None: - classifier = classifier(**params) - - return classifier + # Instancing meta-classifier with given parameters + if params is not None: + classifier = classifier(**params) + return classifier def check_for_random_state(classifier): + """ + Checks if classifiers has an attribute named random_state - """ - Checks if classifiers has an attribute named random_state - - Parameters - ---------- - classifier: object - Instance of an sklearn compatible classifier + Parameters + ---------- + classifier: object + Instance of an sklearn compatible classifier - Returns - ------- - boolean - """ + Returns + ------- + boolean + """ - try: - - load_classifier(classifier)().random_state - return True - - except AttributeError: - return False + try: + load_classifier(classifier)().random_state + return True + except AttributeError: + return False def get_key(key): - - """ - Checks if the key of a dict can be converted to int, - if not, returns the key as is. - - Parameters - ---------- - value: string - - Returns - ------- - int or string - """ - - try: - return int(key) - except ValueError: - return key + """ + Checks if the key of a dict can be converted to int, + if not, returns the key as is. + + Parameters + ---------- + value: string + + Returns + ------- + int or string + """ + + try: + return int(key) + except ValueError: + return key