genetic on the way, initial population generation OK

Pduhard · Apr 14, 2021 · 90cc3ca · 90cc3ca
1 parent c190b9d
commit 90cc3ca
Show file tree

Hide file tree

Showing 76 changed files with 4,003 additions and 0 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+    "python.pythonPath": "~/.brew/Cellar/[email protected]/3.8.8_1/bin/python3.8",
+    "python.linting.pylintEnabled": false,
+    "python.linting.flake8Enabled": true,
+    "python.linting.enabled": true
+}
diff --git a/GAModel.py b/GAModel.py
@@ -0,0 +1,28 @@
+import Protodeep as ptd
+from random import randrange
+
+
+class GAModel():
+
+    def __init__(self, constraints, input_shape,
+                 metrics=['categorical_accuracy'],
+                 loss='BinaryCrossentropy', optimizer='Adam'):
+        i = ptd.layers.Input(input_shape)()
+        out = i
+        print('esf')
+        for c in constraints:
+            units = c['unit_range'][0] if len(c['unit_range']) == 1 else randrange(c['unit_range'][0], c['unit_range'][1])
+            print({'units': units,
+                'activation': c['fas'][randrange(0, len(c['fas']))],
+                'kernel_initializer': c['initializers'][randrange(0, len(c['initializers']))],
+                'kernel_regularizer': c['regularizers'][randrange(0, len(c['regularizers']))]})
+            out = ptd.layers.Dense(
+                units=units,
+                activation=c['fas'][randrange(0, len(c['fas']))],
+                kernel_initializer=c['initializers'][randrange(0, len(c['initializers']))],
+                kernel_regularizer=c['regularizers'][randrange(0, len(c['regularizers']))]
+            )(out)
+        self.model = ptd.model.Model(inputs=i, outputs=out)
+        self.model.compile(input_shape, metrics=metrics, loss=loss,
+                           optimizer=optimizer)
+        self.model.summary()
diff --git a/Genetic.py b/Genetic.py
@@ -0,0 +1,52 @@
+# import Protodeep as ptd
+from GAModel import GAModel
+"""
+[{
+    unit_range = [40, 60]
+    fa = ['relu'....]
+    init = ['random'...]
+    w_reg = [''...]
+    # b_reg = [''...]
+    # out_reg = [''...]
+    # use_bias = boolean
+}, {}, {}]
+"""
+
+
+class Genetic():
+
+    def __init__(self, constraints, dataset, population_size=20,
+                 mutation_rate=0.05, generation=10):
+        self.constraints = constraints
+        self.dataset = dataset
+        self.input_shape = dataset.features.shape[1:]
+        self.population_size = population_size
+        self.mutation_rate = mutation_rate
+        self.generation = generation
+        self.init_pop()
+
+    # def new_rand_entity(self, constraints)
+    def init_pop(self):
+        self.population = [
+            GAModel(self.constraints, self.input_shape) for i in range(self.population_size)
+        ]
+        print('hallo')
+        quit()
+
+    def evaluate(self, entity):
+        pass
+
+    def fit_pop(self):
+        pass
+        # for p in population:
+        #     p.fitness = score(p)
+
+    def cross_pop(self):
+        pass
+
+    def mutate_pop(self):
+        pass
+
+
+if __name__ == '__main__':
+    print('hello')
diff --git a/Makefile b/Makefile
@@ -0,0 +1,22 @@
+
+.PHONY = help setup test run
+
+.DEFAULT_GOAL = help
+UNAME := $(shell uname)
+
+ifeq ($(UNAME), Darwin)
+PYTHON=~/.brew/Cellar/[email protected]/3.8.8_1/bin/python3.8
+else
+PYTHON=python
+endif
+
+LIB=protodeep
+
+help:
+	@echo ---------------HELP-----------------
+	@echo To setup the project and build packages type make setup
+	@echo ------------------------------------
+
+setup:
+	make setup -C $(LIB)
+
diff --git a/Preprocessing/Split.py b/Preprocessing/Split.py
@@ -0,0 +1,35 @@
+import numpy as np
+
+
+class Split:
+
+    @staticmethod
+    def train_test_split(x, y, test_size=0.2, train_size=None,
+                         seed=None, shuffle=True):
+        x = np.array(x)
+        y = np.array(y)
+        if shuffle:
+            if seed is not None:
+                np.random.seed(seed)
+            else:
+                rng_state = np.random.get_state()
+            np.random.shuffle(x)
+            if seed is not None:
+                np.random.seed(seed)
+            else:
+                np.random.set_state(rng_state)
+            np.random.shuffle(y)
+        split = int((1 - test_size) * len(x))
+        return ((x[:split], y[:split]), (x[split:], y[split:]))
+
+    @staticmethod
+    def time_series_split(features, targets, ssize=10):
+        wfeatures = []
+        wtargets = []
+
+        for i in range(len(targets) - ssize):
+            wfeatures.append(features[i:i+ssize])
+            wtargets.append(targets[i+ssize][0])
+        wfeatures = np.array(wfeatures).astype(np.float32)
+        wtargets = np.array(wtargets).astype(np.float32)
+        return wfeatures, wtargets
diff --git a/data.csv b/data.csv
diff --git a/dataset.py b/dataset.py
@@ -0,0 +1,149 @@
+import numpy as np
+import matplotlib.pyplot as plt
+# from numba import njit
+
+from fstat import Fstat
+
+def time_series_split(features, targets, ssize=10):
+    wfeatures = []
+    wtargets = []
+
+
+    for i in range(len(targets) - ssize):
+        wfeatures.append(features[i:i+ssize])
+        wtargets.append(targets[i+ssize][0])
+    # for d in dataset[:10]:
+    #     print(d)
+    wfeatures = np.array(wfeatures).astype(np.float32)
+    wtargets = np.array(wtargets).astype(np.float32)
+    return wfeatures, wtargets
+
+
+def parse_btc(file_name='BTCUSD_day.csv'):
+    with open(file_name, 'r') as infile:
+        lines = [line for line in infile.read().split('\n') if len(line) > 1]
+        features = np.empty((len(lines), 4))
+        targets = np.empty((len(lines), 1))
+        lines.pop(0)
+        for i, line in enumerate(lines):
+            sline = line.split(",")[3:-1]
+            # print(sline[-1])
+            # target = sline.pop(0)
+            targets[i] = float(sline[-1])
+            features[i] = np.array(sline, dtype=float)
+        # print(features.shape)
+        # quit()
+    return features, targets[:, np.newaxis]
+    return time_series_split(features, targets[:, np.newaxis], ssize=10)
+
+
+def parse_csv(file_name):
+    fd = open(file_name, "r")
+    lines = [line for line in fd.read().split() if len(line) > 1]
+    features = np.empty((len(lines), 30))
+    targets = np.empty((len(lines), 2))
+    # print (features.shape)
+    # print (targets.shape)
+    # i = 0
+    for i, line in enumerate(lines):
+        sline = line.split(",")[1:]
+        target = sline.pop(0)
+        targets[i] = [1, 0] if target == "M" else [0, 1]
+        features[i] = np.array(sline)
+    return (features.astype(float), targets.astype(float))
+
+
+def parse_mnist_csv(file_name):
+    fd = open(file_name, "r")
+    lines = fd.read().split()[:]
+    _ = lines.pop(0)
+    # print(header)
+    # print(len(header.split(",")))
+    # print(len(lines))
+    # quit()
+    features = np.empty((len(lines), 28, 28, 1))
+    targets = np.zeros((len(lines), 10))
+    # print (features.shape)
+    # print (targets.shape)
+    for i, line in enumerate(lines):
+        sline = line.split(",")
+        target = int(sline.pop(-1))
+        targets[i][target] = 1
+        # targets[i] = [1, 0] if target == "M" else [0, 1]
+        features[i] = np.array(sline).reshape((28, 28, 1))
+        # print(features[0])
+        # print(targets[0])
+    # plt.imshow(features[0], cmap=plt.get_cmap('gray'))
+    # plt.show()
+    return (features.astype(float) / 255., targets.astype(float))
+
+# https://medium.com/@pavisj/convolutions-and-backpropagations-46026a8f5d2c
+
+
+seed = 303
+epsilon = 1e-8
+
+
+# @njit
+def _standardize(features, features_stat):
+
+    for feature in features:
+        for i, f in enumerate(feature):
+            fs = features_stat[i]
+            feature[i] = (f - fs.mean) / (fs.std)
+
+
+class Dataset:
+
+    test_features = None
+    test_targets = None
+
+    def split_dataset(self, test_split):
+        # rng_state = np.random.get_state()
+        np.random.seed(seed)
+        np.random.shuffle(self.features)
+        np.random.seed(seed)
+        # np.random.set_state(rng_state)
+        np.random.shuffle(self.targets)
+        split = int((1 - test_split) * len(self.features))
+        self.test_features = self.features[split:]
+        self.test_targets = self.targets[split:]
+        self.features = self.features[:split]
+        self.targets = self.targets[:split]
+
+    def __init__(self, file_name, test_split=0.0, scale=True):
+        if test_split > 1:
+            test_split = 1
+        elif test_split < 0:
+            test_split = 0
+        if 'mnist' in file_name:
+            self.features, self.targets = parse_mnist_csv(file_name)
+            self.split_dataset(test_split)
+        elif 'BTC' in file_name:
+            self.features, self.targets = parse_btc()
+        else:
+            self.features, self.targets = parse_csv(file_name)
+            self.features_stat = [Fstat(feature) for feature in self.features.T]
+            if scale:
+                self.standardize()
+            if test_split > 0:
+                self.split_dataset(test_split)
+        # plt.imshow(self.features[0], cmap=plt.get_cmap('gray'))
+        # plt.show()
+        # quit()
+
+    def normalize(self):
+        for feature in self.features:
+            for i, f in enumerate(feature):
+                fs = self.features_stat[i]
+                feature[i] = (f - fs.min) / (fs.max - fs.min)
+
+    def standardize(self):
+        # _standardize(self.features, self.features_stat)
+        for feature in self.features:
+            for i, f in enumerate(feature):
+                fs = self.features_stat[i]
+                # print(fs.std)
+                feature[i] = (f - fs.mean) / (fs.std)
+                feature[i] = (f - fs.mean) / (fs.std + epsilon)
+                # print(feature[i])
diff --git a/fstat.py b/fstat.py
@@ -0,0 +1,10 @@
+import numpy as np
+
+
+class Fstat:
+
+    def __init__(self, feature):
+        self.mean = np.mean(feature)  # sum(feature) / len(feature)
+        self.std = np.std(feature)  # dslr :D
+        self.min = np.min(feature)
+        self.max = np.max(feature)
diff --git a/main_genetic.py b/main_genetic.py
@@ -0,0 +1,41 @@
+from dataset import Dataset
+from scalers.StandardScaler import StandardScaler
+from Preprocessing.Split import Split
+from Genetic import Genetic
+import Protodeep as pdt
+
+if __name__ == "__main__":
+    dataset = Dataset('data.csv', 0.2)
+
+    scaler = StandardScaler().fit(dataset.features)
+    dataset.features = scaler.transform(dataset.features)
+    scaler.save()
+
+    ((x_train, y_train), (x_test, y_test)) = Split.train_test_split(
+        dataset.features, dataset.targets)
+
+    print(pdt.activations.__all__[:-1])
+    gen = Genetic(
+        constraints=[
+            {
+                'unit_range': [20, 80],
+                'fas': ['Linear', 'Relu', 'Sigmoid', 'Softmax', 'Tanh'],
+                'initializers': ['GlorotNormal', 'GlorotUniform', 'HeNormal', 'RandomNormal', 'Zeros'],
+                'regularizers': ['L1', 'L2', 'L1L2']
+            },
+            {
+                'unit_range': [10, 30],
+                'fas': ['Linear', 'Relu', 'Sigmoid', 'Softmax', 'Tanh'],
+                'initializers': ['GlorotNormal', 'GlorotUniform', 'HeNormal', 'RandomNormal', 'Zeros'],
+                'regularizers': ['L1', 'L2', 'L1L2']
+            },
+            {
+                'unit_range': [2],
+                'fas': ['Softmax'],
+                'initializers': ['GlorotNormal', 'GlorotUniform', 'HeNormal', 'RandomNormal', 'Zeros'],
+                'regularizers': [None]
+            }
+        ],
+        dataset=dataset
+    )
+