-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathgrid-world.py
More file actions
65 lines (57 loc) · 3.41 KB
/
grid-world.py
File metadata and controls
65 lines (57 loc) · 3.41 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""
Lifelong RL experiment in constant transition function setting
"""
import numpy as np
from llrl.agents.rmax import RMax
from llrl.agents.lrmax import LRMax
from llrl.agents.maxqinit import MaxQInit
from llrl.agents.lrmaxqinit import LRMaxQInit
from llrl.utils.env_handler import make_env_distribution
from llrl.experiments import run_agents_lifelong
def experiment():
# Parameters
gamma = .9
n_env = 5
w, h = 20, 20
n_states = w * h
env_distribution = make_env_distribution(env_class='grid-world', env_name='grid-world', n_env=n_env,
gamma=gamma, w=w, h=h)
actions = env_distribution.get_actions()
n_known = 1
p_min = 1. / float(n_env)
r_max = 1.
v_max = 10.
epsilon_q = .01
epsilon_m = .01
delta = .1
max_mem = 1
# Agents
rmax = RMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, name='RMax')
lrmax = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
max_memory_size=max_mem, prior=None, estimate_distances_online=True,
min_sampling_probability=p_min, name='LRMax')
lrmaxprior02 = LRMax(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
max_memory_size=max_mem, prior=0.2, estimate_distances_online=False,
min_sampling_probability=p_min, name='LRMax(Dmax0.2)')
maxqinit = MaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta, n_states=n_states,
min_sampling_probability=p_min, name='MaxQInit')
lrmaxqinit = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
n_states=n_states, max_memory_size=max_mem, prior=None, estimate_distances_online=True,
min_sampling_probability=p_min, name='LRMaxQInit')
lrmaxqinitprior02 = LRMaxQInit(actions=actions, gamma=gamma, r_max=r_max, v_max=v_max, deduce_v_max=False, n_known=n_known,
deduce_n_known=False, epsilon_q=epsilon_q, epsilon_m=epsilon_m, delta=delta,
n_states=n_states, max_memory_size=max_mem, prior=0.2, estimate_distances_online=True,
min_sampling_probability=p_min, name='LRMaxQInit(Dmax0.2)')
agents_pool = [rmax, lrmax, lrmaxprior02, maxqinit, lrmaxqinit, lrmaxqinitprior02]
# Run
run_agents_lifelong(agents_pool, env_distribution, name_identifier=None, n_instances=1, n_tasks=100, n_episodes=100,
n_steps=13, reset_at_terminal=False, open_plot=False, plot_title=True, do_run=True,
do_plot=True, parallel_run=True, n_processes=None)
if __name__ == '__main__':
np.random.seed(1993)
experiment()