Skip to content

Commit

Permalink
genetic on the way, initial population generation OK
Browse files Browse the repository at this point in the history
  • Loading branch information
Paco Duhard-jourdan committed Apr 14, 2021
1 parent c190b9d commit 90cc3ca
Show file tree
Hide file tree
Showing 76 changed files with 4,003 additions and 0 deletions.
6 changes: 6 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"python.pythonPath": "~/.brew/Cellar/[email protected]/3.8.8_1/bin/python3.8",
"python.linting.pylintEnabled": false,
"python.linting.flake8Enabled": true,
"python.linting.enabled": true
}
28 changes: 28 additions & 0 deletions GAModel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import Protodeep as ptd
from random import randrange


class GAModel():

def __init__(self, constraints, input_shape,
metrics=['categorical_accuracy'],
loss='BinaryCrossentropy', optimizer='Adam'):
i = ptd.layers.Input(input_shape)()
out = i
print('esf')
for c in constraints:
units = c['unit_range'][0] if len(c['unit_range']) == 1 else randrange(c['unit_range'][0], c['unit_range'][1])
print({'units': units,
'activation': c['fas'][randrange(0, len(c['fas']))],
'kernel_initializer': c['initializers'][randrange(0, len(c['initializers']))],
'kernel_regularizer': c['regularizers'][randrange(0, len(c['regularizers']))]})
out = ptd.layers.Dense(
units=units,
activation=c['fas'][randrange(0, len(c['fas']))],
kernel_initializer=c['initializers'][randrange(0, len(c['initializers']))],
kernel_regularizer=c['regularizers'][randrange(0, len(c['regularizers']))]
)(out)
self.model = ptd.model.Model(inputs=i, outputs=out)
self.model.compile(input_shape, metrics=metrics, loss=loss,
optimizer=optimizer)
self.model.summary()
52 changes: 52 additions & 0 deletions Genetic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# import Protodeep as ptd
from GAModel import GAModel
"""
[{
unit_range = [40, 60]
fa = ['relu'....]
init = ['random'...]
w_reg = [''...]
# b_reg = [''...]
# out_reg = [''...]
# use_bias = boolean
}, {}, {}]
"""


class Genetic():

def __init__(self, constraints, dataset, population_size=20,
mutation_rate=0.05, generation=10):
self.constraints = constraints
self.dataset = dataset
self.input_shape = dataset.features.shape[1:]
self.population_size = population_size
self.mutation_rate = mutation_rate
self.generation = generation
self.init_pop()

# def new_rand_entity(self, constraints)
def init_pop(self):
self.population = [
GAModel(self.constraints, self.input_shape) for i in range(self.population_size)
]
print('hallo')
quit()

def evaluate(self, entity):
pass

def fit_pop(self):
pass
# for p in population:
# p.fitness = score(p)

def cross_pop(self):
pass

def mutate_pop(self):
pass


if __name__ == '__main__':
print('hello')
22 changes: 22 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@

.PHONY = help setup test run

.DEFAULT_GOAL = help
UNAME := $(shell uname)

ifeq ($(UNAME), Darwin)
PYTHON=~/.brew/Cellar/[email protected]/3.8.8_1/bin/python3.8
else
PYTHON=python
endif

LIB=protodeep

help:
@echo ---------------HELP-----------------
@echo To setup the project and build packages type make setup
@echo ------------------------------------

setup:
make setup -C $(LIB)

35 changes: 35 additions & 0 deletions Preprocessing/Split.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import numpy as np


class Split:

@staticmethod
def train_test_split(x, y, test_size=0.2, train_size=None,
seed=None, shuffle=True):
x = np.array(x)
y = np.array(y)
if shuffle:
if seed is not None:
np.random.seed(seed)
else:
rng_state = np.random.get_state()
np.random.shuffle(x)
if seed is not None:
np.random.seed(seed)
else:
np.random.set_state(rng_state)
np.random.shuffle(y)
split = int((1 - test_size) * len(x))
return ((x[:split], y[:split]), (x[split:], y[split:]))

@staticmethod
def time_series_split(features, targets, ssize=10):
wfeatures = []
wtargets = []

for i in range(len(targets) - ssize):
wfeatures.append(features[i:i+ssize])
wtargets.append(targets[i+ssize][0])
wfeatures = np.array(wfeatures).astype(np.float32)
wtargets = np.array(wtargets).astype(np.float32)
return wfeatures, wtargets
569 changes: 569 additions & 0 deletions data.csv

Large diffs are not rendered by default.

149 changes: 149 additions & 0 deletions dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
import numpy as np
import matplotlib.pyplot as plt
# from numba import njit

from fstat import Fstat

def time_series_split(features, targets, ssize=10):
wfeatures = []
wtargets = []


for i in range(len(targets) - ssize):
wfeatures.append(features[i:i+ssize])
wtargets.append(targets[i+ssize][0])
# for d in dataset[:10]:
# print(d)
wfeatures = np.array(wfeatures).astype(np.float32)
wtargets = np.array(wtargets).astype(np.float32)
return wfeatures, wtargets


def parse_btc(file_name='BTCUSD_day.csv'):
with open(file_name, 'r') as infile:
lines = [line for line in infile.read().split('\n') if len(line) > 1]
features = np.empty((len(lines), 4))
targets = np.empty((len(lines), 1))
lines.pop(0)
for i, line in enumerate(lines):
sline = line.split(",")[3:-1]
# print(sline[-1])
# target = sline.pop(0)
targets[i] = float(sline[-1])
features[i] = np.array(sline, dtype=float)
# print(features.shape)
# quit()
return features, targets[:, np.newaxis]
return time_series_split(features, targets[:, np.newaxis], ssize=10)


def parse_csv(file_name):
fd = open(file_name, "r")
lines = [line for line in fd.read().split() if len(line) > 1]
features = np.empty((len(lines), 30))
targets = np.empty((len(lines), 2))
# print (features.shape)
# print (targets.shape)
# i = 0
for i, line in enumerate(lines):
sline = line.split(",")[1:]
target = sline.pop(0)
targets[i] = [1, 0] if target == "M" else [0, 1]
features[i] = np.array(sline)
return (features.astype(float), targets.astype(float))


def parse_mnist_csv(file_name):
fd = open(file_name, "r")
lines = fd.read().split()[:]
_ = lines.pop(0)
# print(header)
# print(len(header.split(",")))
# print(len(lines))
# quit()
features = np.empty((len(lines), 28, 28, 1))
targets = np.zeros((len(lines), 10))
# print (features.shape)
# print (targets.shape)
for i, line in enumerate(lines):
sline = line.split(",")
target = int(sline.pop(-1))
targets[i][target] = 1
# targets[i] = [1, 0] if target == "M" else [0, 1]
features[i] = np.array(sline).reshape((28, 28, 1))
# print(features[0])
# print(targets[0])
# plt.imshow(features[0], cmap=plt.get_cmap('gray'))
# plt.show()
return (features.astype(float) / 255., targets.astype(float))

# https://medium.com/@pavisj/convolutions-and-backpropagations-46026a8f5d2c


seed = 303
epsilon = 1e-8


# @njit
def _standardize(features, features_stat):

for feature in features:
for i, f in enumerate(feature):
fs = features_stat[i]
feature[i] = (f - fs.mean) / (fs.std)


class Dataset:

test_features = None
test_targets = None

def split_dataset(self, test_split):
# rng_state = np.random.get_state()
np.random.seed(seed)
np.random.shuffle(self.features)
np.random.seed(seed)
# np.random.set_state(rng_state)
np.random.shuffle(self.targets)
split = int((1 - test_split) * len(self.features))
self.test_features = self.features[split:]
self.test_targets = self.targets[split:]
self.features = self.features[:split]
self.targets = self.targets[:split]

def __init__(self, file_name, test_split=0.0, scale=True):
if test_split > 1:
test_split = 1
elif test_split < 0:
test_split = 0
if 'mnist' in file_name:
self.features, self.targets = parse_mnist_csv(file_name)
self.split_dataset(test_split)
elif 'BTC' in file_name:
self.features, self.targets = parse_btc()
else:
self.features, self.targets = parse_csv(file_name)
self.features_stat = [Fstat(feature) for feature in self.features.T]
if scale:
self.standardize()
if test_split > 0:
self.split_dataset(test_split)
# plt.imshow(self.features[0], cmap=plt.get_cmap('gray'))
# plt.show()
# quit()

def normalize(self):
for feature in self.features:
for i, f in enumerate(feature):
fs = self.features_stat[i]
feature[i] = (f - fs.min) / (fs.max - fs.min)

def standardize(self):
# _standardize(self.features, self.features_stat)
for feature in self.features:
for i, f in enumerate(feature):
fs = self.features_stat[i]
# print(fs.std)
feature[i] = (f - fs.mean) / (fs.std)
feature[i] = (f - fs.mean) / (fs.std + epsilon)
# print(feature[i])
10 changes: 10 additions & 0 deletions fstat.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
import numpy as np


class Fstat:

def __init__(self, feature):
self.mean = np.mean(feature) # sum(feature) / len(feature)
self.std = np.std(feature) # dslr :D
self.min = np.min(feature)
self.max = np.max(feature)
41 changes: 41 additions & 0 deletions main_genetic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from dataset import Dataset
from scalers.StandardScaler import StandardScaler
from Preprocessing.Split import Split
from Genetic import Genetic
import Protodeep as pdt

if __name__ == "__main__":
dataset = Dataset('data.csv', 0.2)

scaler = StandardScaler().fit(dataset.features)
dataset.features = scaler.transform(dataset.features)
scaler.save()

((x_train, y_train), (x_test, y_test)) = Split.train_test_split(
dataset.features, dataset.targets)

print(pdt.activations.__all__[:-1])
gen = Genetic(
constraints=[
{
'unit_range': [20, 80],
'fas': ['Linear', 'Relu', 'Sigmoid', 'Softmax', 'Tanh'],
'initializers': ['GlorotNormal', 'GlorotUniform', 'HeNormal', 'RandomNormal', 'Zeros'],
'regularizers': ['L1', 'L2', 'L1L2']
},
{
'unit_range': [10, 30],
'fas': ['Linear', 'Relu', 'Sigmoid', 'Softmax', 'Tanh'],
'initializers': ['GlorotNormal', 'GlorotUniform', 'HeNormal', 'RandomNormal', 'Zeros'],
'regularizers': ['L1', 'L2', 'L1L2']
},
{
'unit_range': [2],
'fas': ['Softmax'],
'initializers': ['GlorotNormal', 'GlorotUniform', 'HeNormal', 'RandomNormal', 'Zeros'],
'regularizers': [None]
}
],
dataset=dataset
)

Loading

0 comments on commit 90cc3ca

Please sign in to comment.