diff --git a/lab1/doc/lab1_report.pdf b/lab1/doc/lab1_report.pdf new file mode 100644 index 0000000..786a246 Binary files /dev/null and b/lab1/doc/lab1_report.pdf differ diff --git a/lab1/src/NN.py b/lab1/src/NN.py new file mode 100644 index 0000000..59bf990 --- /dev/null +++ b/lab1/src/NN.py @@ -0,0 +1,118 @@ +import numpy as np +from datetime import datetime + + +def derivative(func, arg): + return func(arg) * (1 - func(arg)) + + +def shuffle(a, b): + p = np.random.permutation(len(a)) + return a[p], b[p] + + +def logistic_regression(arg): + return 1 / (1 + np.exp(-arg)) + + +def softmax(arg): + res = np.zeros(arg.shape) + s = 0 + for i, row in enumerate(arg): + res[i] = np.exp(row) + s += res[i].sum() + return res / s + + +class NNetwork: + weights_hidden = np.array([]) + weights_out = np.array([]) + hidden_layer = np.array([]) + input_layer = np.array([]) + output_layer = np.array([]) + output_layer_expected = np.array([]) + epochs = 100 + cross_entropy_min = 0.05 + learn_rate = 0.01 + hidden_size = 300 + input_size = 28 * 28 + output_size = 10 + + def __init__(self, epochs, cross_entropy, learn_rate, hidden_size): + self.epochs = epochs + self.cross_entropy_min = cross_entropy + self.learn_rate = learn_rate + self.hidden_size = hidden_size + self.hidden_layer = np.zeros(hidden_size) + + def reset_weights(self): + self.weights_hidden = 2 * np.random.rand(self.input_size, self.hidden_size) - 1 + self.weights_out = 2 * np.random.rand(self.hidden_size, self.output_size) - 1 + + def calc_hidden(self): + self.hidden_layer = logistic_regression(np.dot(self.input_layer, self.weights_hidden)) + + def calc_output(self): + self.calc_hidden() + self.output_layer = softmax(np.dot(self.hidden_layer, self.weights_out)) + + def correct_weights(self): + gradient_weights = [ + np.zeros((self.input_size, self.hidden_size)), + np.zeros((self.hidden_size, self.output_size)) + ] + delta1 = np.zeros(self.hidden_size) + delta2 = np.zeros(self.output_size) + for i in range(self.hidden_size): + delta2 = self.output_layer - self.output_layer_expected + gradient_weights[1][i] = np.dot(delta2, self.hidden_layer[i]) + for i in range(self.hidden_size): + delta1[i] += np.dot(delta2, self.weights_out[i]) * derivative(logistic_regression, + self.hidden_layer[i]) + for i in range(self.input_size): + gradient_weights[0][i] = np.dot(delta1, self.input_layer[i]) + + self.weights_hidden -= self.learn_rate * gradient_weights[0] + self.weights_out -= self.learn_rate * gradient_weights[1] + + def set_input(self, input_layer, label): + self.input_layer = input_layer + self.output_layer_expected = label + + def calc_cross_entropy(self, data, labels): + error = 0.0 + for i in range(len(data)): + self.set_input(data[i] / 255, labels[i]) + index = self.output_layer_expected.argmax() + self.calc_output() + error -= np.log(self.output_layer[index]) + error /= len(data) + return error + + def train(self, data, labels): + print(str(datetime.now()), 'Start training...') + for epoch in range(self.epochs): + correct = 0 + data, labels = shuffle(data, labels) + print(str(datetime.now()), 'Shuffled data...') + for i in range(len(data)): + self.set_input(data[i] / 255, labels[i]) + self.calc_output() + if self.output_layer.argmax() == self.output_layer_expected.argmax(): + correct += 1 + self.correct_weights() + print(str(datetime.now()), 'Corrected weights...') + precision = correct / len(data) + cross_entropy = self.calc_cross_entropy(data, labels) + print(str(datetime.now()), 'Epoch:', epoch, 'Cross entropy:', cross_entropy, 'Precision:', precision) + if cross_entropy < self.cross_entropy_min: + break + + def test(self, data, labels): + correct = 0 + for i in range(len(data)): + self.set_input(data[i] / 255, labels[i]) + self.calc_output() + if self.output_layer.argmax() == self.output_layer_expected.argmax(): + correct += 1 + return correct / len(data) diff --git a/lab1/src/Train.py b/lab1/src/Train.py new file mode 100644 index 0000000..688c040 --- /dev/null +++ b/lab1/src/Train.py @@ -0,0 +1,55 @@ +import sys +from datetime import datetime + +import numpy as np + +import get_data + +from NN import NNetwork + + +def main(argv): + if len(argv) != 5: + epochs = 10 + cross_entropy = 0.001 + learn_rate = 0.01 + hidden_size = 500 + else: + epochs = int(argv[1]) + cross_entropy = float(argv[2]) + learn_rate = float(argv[3]) + hidden_size = int(argv[4]) + + N_train = 60000 + N_test = 10000 + + train_images = get_data.train_images()[:N_train] + train_labels = get_data.train_labels()[:N_train] + test_images = get_data.test_images()[:N_test] + test_labels = get_data.test_labels()[:N_test] + + X_train = np.zeros((N_train, 784)) # 784 = 28 * 28 from image sizes + for i, pic in enumerate(train_images): + X_train[i] = pic.flatten() + X_test = np.zeros((N_test, 784)) + for i, pic in enumerate(test_images): + X_test[i] = pic.flatten() + Y_train = np.zeros((len(train_labels), 10)) # 10 numbers + for i in range(len(train_labels)): + Y_train[i][train_labels[i]] = 1 + Y_test = np.zeros((len(test_labels), 10)) + for i in range(len(test_labels)): + Y_test[i][test_labels[i]] = 1 + network = NNetwork(epochs, cross_entropy, learn_rate, hidden_size) + network.reset_weights() + print(str(datetime.now()), 'Initialization successful, training network...') + network.train(X_train, Y_train) + print(str(datetime.now()), 'Training ended') + train_result = network.test(X_train, Y_train) + print(str(datetime.now()), 'Training data result:', train_result) + test_result = network.test(X_test, Y_test) + print(str(datetime.now()), 'Test data precision:', test_result) + + +if __name__ == "__main__": + main(sys.argv) diff --git a/lab1/src/get_data.py b/lab1/src/get_data.py new file mode 100644 index 0000000..64a46c4 --- /dev/null +++ b/lab1/src/get_data.py @@ -0,0 +1,46 @@ +from urllib.parse import urljoin +from urllib.request import urlretrieve +import tempfile +import os +import gzip +import struct +import numpy as np +import array + +data_url = 'http://yann.lecun.com/exdb/mnist/' + + +def prepare_file(file_name): + url = urljoin(data_url, file_name) + local_dir = tempfile.gettempdir() + local_file = os.path.join(local_dir, file_name) + + urlretrieve(url, local_file) + + with gzip.open(local_file,'rb') as f: + types = {0x08: 'B', 0x09: 'b', 0x0b: 'h', 0x0c: 'i', 0x0d: 'f', 0x0e: 'd'} + head = f.read(4) + zeros, data_type, num_dimensions = struct.unpack('>HBB', head) + data_type = types[data_type] + dimension_sizes = struct.unpack('>' + 'I' * num_dimensions, f.read(4*num_dimensions)) + data = array.array(data_type, f.read()) + data.byteswap() + tmp = np.array(data).reshape(dimension_sizes) + + return tmp + + +def train_images(): + return prepare_file('train-images-idx3-ubyte.gz') + + +def train_labels(): + return prepare_file('train-labels-idx1-ubyte.gz') + + +def test_images(): + return prepare_file('t10k-images-idx3-ubyte.gz') + + +def test_labels(): + return prepare_file('t10k-labels-idx1-ubyte.gz') \ No newline at end of file