@@ -263,7 +263,7 @@ def _discount_rewards(self, last_values):
263263 return returns [::- 1 ], advantages [::- 1 ]
264264
265265
266- def play (agent , path , max_step = 50 , nb_episodes = 1 , verbose = True ):
266+ def play (agent , path , max_step = 50 , nb_episodes = 10 , verbose = True ):
267267 """
268268 This code uses the cooking agent design in the spaceship game.
269269
@@ -304,7 +304,6 @@ def play(agent, path, max_step=50, nb_episodes=1, verbose=True):
304304 command = agent .act (obs , score , done , infos )
305305 obs , score , done , infos = env .step (command )
306306 nb_moves += 1
307-
308307 agent .act (obs , score , done , infos ) # Let the agent know the game is done.
309308
310309 if verbose :
@@ -319,19 +318,26 @@ def play(agent, path, max_step=50, nb_episodes=1, verbose=True):
319318 if os .path .isdir (path ):
320319 print (msg .format (np .mean (avg_moves ), np .mean (avg_norm_scores ), 1 ))
321320 else :
321+ print (avg_scores )
322322 print (msg .format (np .mean (avg_moves ), np .mean (avg_scores ), infos ["max_score" ]))
323323
324324
325325agent = NeuralAgent ()
326+ step_size = 750
326327
327328print (" ===== Training ===================================================== " )
328329agent .train () # Tell the agent it should update its parameters.
329330start_time = time ()
330- play (agent , "./games/levelMedium.ulx" , nb_episodes = 25 , verbose = False )
331+ print (os .path .realpath ("./games/levelMedium_v1.ulx" ))
332+ play (agent , "./games/levelMedium_v1.ulx" , max_step = step_size , nb_episodes = 2000 , verbose = False )
331333print ("Trained in {:.2f} secs" .format (time () - start_time ))
332334
333335print (' ===== Test ========================================================= ' )
334- # agent.test()
335- agent .test (method = 'eps-soft' )
336- play (agent , "./games/levelMedium.ulx" ) # Medium level game.
336+ agent .test (method = 'random' )
337+ play (agent , "./games/levelMedium_v1.ulx" , max_step = step_size ) # Medium level game.
338+
339+ save_path = "./model/levelMedium_v1_random.npy"
340+ if not os .path .exists (os .path .dirname (save_path )):
341+ os .mkdir (os .path .dirname (save_path ))
337342
343+ np .save (save_path , agent )
0 commit comments