-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathexp.py
executable file
·500 lines (429 loc) · 18.1 KB
/
exp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
import copy
import multiprocessing
import os
import argparse
import pickle
import sys
from typing import Tuple, List, Dict, Callable, Optional, Union
import mongoengine
import numpy as np
import atexit
import datetime
from gym.envs.classic_control import CartPoleEnv
from globalvalues import gv
import draw
import models.trainingrun as tg
from agent import Agent
from lineFollowingEnvironment import LineFollowingEnv
from lineFollowingEnvironment2 import LineFollowingEnv2
from mongoengine import Document, FileField, ListField, StringField, BinaryField, IntField, DateTimeField, FloatField, \
ReferenceField, disconnect, DictField
import json
class Experiment(Document):
"""
Creates and manages a traiing environment with a SNN RL algorithm.
"""
training = ReferenceField(tg.Trainingrun)
parameterdump = StringField()
time_start = DateTimeField()
time_end = DateTimeField()
time_elapsed = FloatField() # in s
diagrams = ListField(FileField())
cycle_i = IntField(default=0)
totalCycleCounter = IntField(default=0)
episode = IntField(default=0)
return_per_episode_sum = ListField(FloatField()) # per episode
log_Δw = ListField(FloatField()) # per episode
log_m = ListField(FloatField()) # per episode
epslength = ListField(IntField())
episodedata = ListField(ReferenceField(tg.Episode))#stores references to all episodes
workerdata = DictField()
def __init__(self, *args, **values):
super().__init__(*args, **values)
gv.init()
self.printedbias = False
self.env = None
self.penalty = -8 # penalty for ending
self.errsig_contingent = [0]
self.return_per_episode_sum = []
self.totalCycleCounter = -1 # will be increased at the beginning of the cycle
self.log_Δw = []
self.log_m = []
self.rewards: List = [] # reward of last episode
self.errsigs = None
self.utils = None
self.agent: Agent = None
self.lastweights: np.array = 0 # initialized with 0 so that it can be used in computation
self.epslength = [] # stores the number of cycles for each episode
def cycle(self, observation_in: np.array) -> np.array:
"""Calculates brain one frame, applies action and simulates environment for a frame
: observation_in: the last observation
:return float values
"""
if gv.render:
self.env.render()
self.totalCycleCounter += 1
# feed observations into brain
action, neural_activity = self.agent.actor.cycle(time=gv.cycle_length * self.cycle_i,
observation_in=observation_in)
# simulate environment
observation, reward, done, info = self.env.step(action)
reward_internal = reward
# distance from ideal position
# if isinstance(self.env.env, CartPoleEnv):
# reward_internal = 50 * np.math.cos(observation[2])
if not self.printedbias:
print("Bias: " + str(reward + self.penalty))
self.printedbias = True
try: # try because of env.env
if done and not (isinstance(self.env.env, CartPoleEnv) and self.cycle_i >= 200):
#add a penalty for cartpole when failed
reward_internal += self.penalty
except:
pass
err_signal, util = self.agent.critic.tick(state=observation, new_rewards=[reward_internal])
# store unedited
if not gv.demo:
self.errsigs[self.episode, self.cycle_i] = err_signal
# self.utils[self.episode, self.totalCycleCounter] = util
self.rewards.append(reward)
# clamp utility
if gv.max_util_integral != float("inf"):
if abs(self.errsig_contingent[-1] + err_signal) >= gv.max_util_integral:
err_signal = 0
self.errsig_contingent.append(self.errsig_contingent[-1] + err_signal)
# gv.outactivity["utility"].append(utility)
if gv.structural_plasticity:
self.agent.actor.connectome.update_structural_plasticity()
# Set reward signal for left and right network
self.agent.actor.release_neurotransmitter(err_signal * gv.errsig_factor)
self.agent.end_cycle(self.cycle_i)
return done, observation
def simulate_episode(self) -> bool:
"""Simulate one episode
:return: True if everything went okay. False if training needs to be canceled
"""
if self.episode > 0:
self.agent.prepare_episode()
observation = self.env.reset()
self.rewards.clear()
for self.cycle_i in range(gv.max_cycles):
# if failed, break early
done, observation = self.cycle(observation_in=observation)
if done:
break
# extra simulation time to apply changes in last cycle before resetting
self.agent.post_episode()
self.epslength.append(self.cycle_i)
return self.post_episode()
def post_episode(self) -> bool:
"""
:return: True if everything went okay. False if training needs to be canceled
"""
eps: tg.Episode = tg.Episode()
eps.rewards = self.rewards
if gv.save_to_db:
eps.episode = self.episode
if len(self.agent.actor.log_m)>0:
eps.neuromodulator = self.agent.actor.log_m
self.log_m.append(np.average(eps.neuromodulator))
# extract the last weights
try:
weights = np.array(list(self.agent.get_weights().values()))
except:
weights = self.agent.get_weights()
# check if no weight changed -> Early termination
Δw: float = np.sum(weights - self.lastweights)
self.log_Δw.append(Δw)
if gv.allow_early_termination and self.episode > 50 and -0.00001 < Δw < 0.00001:
self.early_termination(eps, weights)
return False
self.lastweights = weights
self.return_per_episode_sum.append(np.sum(self.rewards))
if gv.save_to_db:
# save at the end of the training
if self.episode > 0 and self.episode % (gv.num_episodes-1) == 0:
self.save_episode(eps, weights)
self.save()
if not gv.demo:
self.agent.end_episode(self.episode)
return True
def early_termination(self, eps, weights):
print("\nEarly termination because Δw=0.")
# todo log a message in the db
if gv.save_to_db:
#eps.activation = list(np.average(np.array(self.agent.actor.log_activation), axis=0))
eps.neuromodulator = self.agent.actor.log_m
self.save_episode(eps, weights)
try:
self.agent.actor.connectome.drawspikes()
except AttributeError:
pass
self.save()
def save_episode(self, eps, weights):
eps.weights_human = weights.tolist()
eps.weights = pickle.dumps(weights)
eps.save()
self.episodedata.append(eps.id)
def train(self):
"""Trains the agent for given numbers"""
# extend on existing recordings
self.errsigs = np.full((self.episode + gv.num_episodes, gv.max_cycles), np.nan)
for episode_training in range(gv.num_episodes):
# episode_training=0
# while self.totalCycleCounter < gv.max_cycles:
episode_training += 1
# simulate
if not self.simulate_episode():
break
# "CartPole-v0 defines solving as getting average return of 195.0 over 100 consecutive trials."
last100return = np.average(self.return_per_episode_sum[self.episode-100:self.episode+1])
# time/performance evaluation
tpe = (datetime.datetime.utcnow() - self.time_start) / episode_training
# tpc = (datetime.datetime.utcnow() - self.time_start) / self.totalCycleCounter
# eta = tpc * (gv.max_cycles - self.totalCycleCounter)
eta = tpe * (gv.num_episodes - episode_training)
overwrite = "\r" if self.episode > 0 else ""
sys.stdout.write(
f"{overwrite}{self.episode * 100 / gv.num_episodes:3.3f}% (Episode: {self.episode}, Cycle:{self.totalCycleCounter}) ETA {eta}. Avg. return: {last100return:.1f}")
sys.stdout.flush()
# plots
if gv.num_plots > 0 and gv.num_episodes > gv.num_plots and self.episode % (
gv.num_episodes // gv.num_plots) == 0:
# draw.voltage(self.agent.actor.connectome.multimeter, persp="2d")
try:
self.agent.actor.connectome.drawspikes()
except AttributeError:
pass
self.episode += 1
print(f"Cycles: {self.totalCycleCounter}")
def drawreport(self):
# self.agent.critic.draw(xaxis=0, yaxis=1)
filename = f"{self.id}.png" if self.id is not None else None
try:
connectome = self.agent.actor.connectome.conns
except:
connectome = None
draw.report(utility=self.errsigs,
weights=np.array(self.agent.actor.weightlog),
returnpereps=self.return_per_episode_sum,
connections=connectome,
filename=filename,
env=self.env)
def presetup(self):
print("Process w/ worker id " + str(multiprocessing.current_process()))
dbconnect()
self.time_start = datetime.datetime.utcnow()
if gv.save_to_db:
self.save() # safe first to get id
# pre-training
def dump(obj):
f = ""
for attr in dir(obj):
if attr != "__dict__":
f += "obj.%s = %r" % (attr, getattr(obj, attr)) + "\n"
return f
self.parameterdump = dump(gv)
# dump(f, self)
# dump(f, self.agent.critic)
# register instance
self.training.instances.append(str(self.id))
if gv.save_to_db:
self.training.save()
def posttrain(self):
# stats
self.time_end = datetime.datetime.utcnow()
self.time_elapsed = (self.time_end - self.time_start).total_seconds()
if gv.save_to_db:
self.save()
if isinstance(self.env, LineFollowingEnv) or isinstance(self.env, LineFollowingEnv2):
self.drawreport()
self.env.close()
# if not gv.render:
# self.show()
def run(self, workerdata: Dict = None) -> List[float]:
"""
Create and trains the network.
:param configurator:
:param workerdata:
:return: the results of the training
"""
self.training = workerdata.pop("training")
self.presetup()
self.workerdata = workerdata
gv.workerdata = workerdata # not nice to add it as a global variable
# create experiment
configurator: Callable
if "configurator" in workerdata and workerdata["configurator"] is not None:
configurator = workerdata.pop("configurator")
else:
from experiments import lf_placecells
configurator = lf_placecells.configure_training
configurator(self)
# parse some gridsearch parameters to overwrite configurator
if workerdata:
for (key, value) in self.workerdata.items():
if hasattr(gv, key):
setattr(gv, key, value)
elif key == "vq_lr_int":
gv.vq_learning_scale = list([0, 10 ** -4, 10 ** -3, 10 ** -2])[int(value)]
elif key == "vq_decay_int":
gv.vq_decay = list([0, 10 ** -4, 10 ** -3, 10 ** -2])[int(value)]
else:
print("unknown gridsearch hyperparameter " + key)
# training for pole
self.train()
self.posttrain()
return self.return_per_episode_sum
def show(self):
global gv
gv_old = copy.deepcopy(gv)
gv.errsig_factor = 0.
gv.structural_plasticity = False
gv.render = True
gv.demo = True
self.agent.prepare_episode()
self.simulate_episode()
gv = gv_old
def runworker(dataperworker: Optional[Dict]) -> List[float]:
"""
Set up a worker (process) and run an experiment.
:param dataperworker:
:return:
"""
# redundant copy of method because the gridsearch returns validation errors
# there was a crash when db was disabled with a gridsearch pool
# this cannot be a local function bedause it will cause a crash"
return Experiment().run(dataperworker)
def gridsearch(num_processes: int, training, configurator: Callable) -> List:
"""perform a gridsearcg on the giving trainingdata """
pool = multiprocessing.Pool(num_processes)
withoutgivenvalues = filter(lambda v: "from" in v, training.gridsearch.values())
#todo insert ranges in gridsearch
withgivenvalues = filter(lambda v: "range" in v, training.gridsearch.values())
parameters: List[slice] = [slice(rangedetails["from"], rangedetails["to"], complex(rangedetails["steps"])) for
rangedetails in withoutgivenvalues]
rangesgridsearch: np.array = np.mgrid[parameters].reshape(len(parameters), -1).T
# put in an array containg the parameters per worker
workload: List[Dict] = []
paramnameslist = list(training.gridsearch.keys())
for workerdata in rangesgridsearch:
# each gets a training reference
obj = {"training": training}
if configurator is not None:
obj["configurator"] = configurator
for paramidx, param in enumerate(workerdata):
obj[paramnameslist[paramidx]] = param
workload.append(obj)
result = pool.map(func=runworker, iterable=workload)
pool.close()
pool.join()
if len(parameters) == 2:
numcolums = list(training.gridsearch.values())[0]["steps"]
resultnp = np.array(result).reshape((-1, numcolums))
table = "\\begin{center}\\begin{tabular}{ | l | l | l | l | l |}\\hline\n"
table += "num.~cells & $\\lambda =0$ (no vq) & $\\lambda =0.0001$ & $\\lambda =0.001$ & $\\lambda =0.01$"
for i, resultitem in enumerate(result):
if i % numcolums == 0:
table += "\\\\ \\hline\n"
table += str(int(i / numcolums)) # row name
table += f" & {np.average(resultitem):.0f}"
table += "\\\\ \\hline\n"
table += "\\end{tabular}\\end{center}"
print(table)
return result
dirname = ""
def exit_handler():
os.chdir("../../")
global dirname
if len(os.listdir(dirname)) == 0:
os.rmdir(dirname)
def createexpdatadir():
"""create new directory for test results and switches to it"""
counter = 0
dirbase = "experimentdata/gsrewardsignal"
global dirname
dirname = dirbase + str(counter)
while os.path.isdir(dirname):
counter += 1
dirname = dirbase + str(counter)
os.makedirs(dirname)
os.chdir(dirname)
print(f"saving to {dirname}\n")
atexit.register(exit_handler)
def dbconnect():
mongoengine.connect(
db='snntrainings',
username='',
password='',
port=45920,
authentication_source='admin',
host=''
)
def trainingrun(configurator: Callable = None, num_processes: int = 1, gridsearchpath: str = None) -> Tuple[Union[None, Experiment], List]:
"""
Creates an experiemnt and runs it.
:param configurator:
:param num_processes:
:param gridsearchpath:
:return: if a single experiment returns this. None if gridsearch.
"""
training = tg.Trainingrun()
training.time_start = datetime.datetime.utcnow()
if gv.save_to_db:
dbconnect()
training.save()
print(f"💾DB Trainingrun: ObjectId(\"{training.id}\")")
disconnect()
# if gridsearch
singleexp = None
if gridsearchpath is not None:
with open(gridsearchpath, "r") as file:
training.gridsearch = json.loads(file.read())
createexpdatadir()
result = gridsearch(num_processes, training, configurator)
else:
# if not a gridsearch
createexpdatadir()
datasingleworker = {"training": training} if configurator is None else {"training": training,
"configurator": configurator}
singleexp = Experiment()
result = singleexp.run(datasingleworker)
training.time_end = datetime.datetime.utcnow()
training.time_elapsed = (training.time_end - training.time_start).total_seconds()
if gv.save_to_db:
dbconnect()
training.save()
disconnect()
print(f"{training.time_elapsed / 60:10.1f} min")
return singleexp, result
import matplotlib.pyplot as plt
# plt.plot(gv.outactivity["out1"], label="ouput 0")
# plt.plot(gv.outactivity["out2"], label="ouput 1")
# #plt.plot(np.array(gv.outactivity["action"])*30, label="action")
# plt.plot(gv.outactivity["in1"], label="input 1")
# plt.plot(gv.outactivity["in2"], label="input 2")
# # plt.plot(np.array(exp.outactivity[2])*80, label="utility")
# # plt.plot(exp.utilitycontingent, label="used utility")
# for xc in exp.epslength:
# plt.axvline(x=xc, color='k')
# plt.title("Experiment")
# plt.xlabel("cycle")
# plt.legend()
# plt.show()
# for exp in exps:
# exp.join()
def parseargs():
parser = argparse.ArgumentParser(description='Process some integers.')
parser.add_argument('--processes', type=int, default=multiprocessing.cpu_count(),
help='The number of cores. Currently only supporting multi-cores in grid search.')
parser.add_argument('-g', '--gridsearch', type=str, default=None, help='json specifing grid search parameter')
parser.add_argument('--headless', action='store_true', help='Do not render.')
args = parser.parse_args()
gv.headless = args.headless
if gv.headless:
gv.render = False
return args
if __name__ == "__main__":
args = parseargs()
trainingrun(num_processes=args.processes, gridsearchpath=args.gridsearch)