-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathvisual_torch_walker.py
32 lines (26 loc) · 1.06 KB
/
visual_torch_walker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import gym
import time
import numpy as np
from ddpg_torch import Agent
game='BipedalWalker-v3'
env = gym.make(game, hardcore=False, render_mode="human")
agent = Agent(alpha=0.00005, beta=0.0005, input_dims=[24], tau=0.001,
batch_size=64, layer1_size=400, layer2_size=300, n_actions=4, max_size=10_000_000)
agent.load_models(f"checkpoints/{game}")
score_history = []
for episode in range(10000):
start = time.time()
done = False
truncated = False
score = 0
iterations = 0
observation, info = env.reset()
while not (done or truncated):
action = agent.choose_action(observation)
observation, reward, done, truncated, info = env.step(action)
score += reward
iterations += 1
env.render()
score_history.append(score)
delta_t = time.time() - start
print(f"episode {episode}: score {score:.2f}, 100 game average {np.mean(score_history[-100:]):.2f}, took {delta_t:.1f} seconds for {iterations} iterations, {iterations/delta_t:.1f} iterations per second, done {done}, truncated {truncated}")