@@ -110,6 +110,11 @@ def interpreter(state, env):
110110 food .append (starting_positions [index + num_agents ])
111111 return state
112112
113+ # Update active agents rewards.
114+ for index , agent in enumerate (state ):
115+ if agent .status == "ACTIVE" :
116+ agent .reward = len (env .steps ) + len (geese [index ])
117+
113118 # Apply the actions from active agents.
114119 for index , agent in enumerate (state ):
115120 if agent .status != "ACTIVE" :
@@ -121,15 +126,15 @@ def interpreter(state, env):
121126
122127 # Wall Hit.
123128 if new_head == - 1 :
124- agent . status = f"Wall Hit: { action } "
125- agent .reward = 0
129+ env . debug_print ( f"Wall Hit: { action } " )
130+ agent .status = "INACTIVE"
126131 geese [index ] = []
127132 continue
128133
129134 # Last Body Hit.
130135 if len (goose ) > 1 and goose [1 ] == new_head :
131- agent . status = f"Body Hit: { action } "
132- agent .reward = 0
136+ env . debug_print ( f"Body Hit: { action } " )
137+ agent .status = "INACTIVE"
133138 geese [index ] = []
134139 continue
135140
@@ -146,8 +151,8 @@ def interpreter(state, env):
146151 if len (env .steps ) % hunger_rate == 0 :
147152 goose .pop ()
148153 if len (goose ) == 0 :
149- agent . status = f"Goose Starved: { action } "
150- agent .reward = 0
154+ env . debug_print ( f"Goose Starved: { action } " )
155+ agent .status = "INACTIVE"
151156 geese [index ] = []
152157 continue
153158
@@ -159,8 +164,8 @@ def interpreter(state, env):
159164 for index , agent in enumerate (state ):
160165 for pos in geese [index ]:
161166 if collisions [pos ] > 1 :
162- agent . status = f"Goose Collision: { agent .action } "
163- agent .reward = 0
167+ env . debug_print ( f"Goose Collision: { agent .action } " )
168+ agent .status = "INACTIVE"
164169 geese [index ] = []
165170 continue
166171
@@ -172,11 +177,10 @@ def interpreter(state, env):
172177 available_positions .remove (pos )
173178 food .extend (sample (available_positions , min_food - len (food )))
174179
175- # If only one ACTIVE agent left, set it's reward to 1 and make INACTIVE.
180+ # If only one ACTIVE agent left, set it to INACTIVE.
176181 active_agents = [a for a in state if a .status == "ACTIVE" ]
177182 if len (active_agents ) == 1 :
178183 active_agents [0 ].status = "INACTIVE"
179- active_agents [0 ].reward = 1
180184
181185 return state
182186
0 commit comments