Update Battle Geese rewards to be based on steps and length.

adamjeffries · adamjeffries · commit e8a198fad977 · 2020-04-07T17:04:22.000-06:00
diff --git a/kaggle_environments/core.py b/kaggle_environments/core.py
@@ -155,16 +155,16 @@ def step(self, actions):
             action_state[index] = {**self.state[index], "action": None}
 
             if isinstance(action, DeadlineExceeded):
-                self.__debug_print(f"Timeout: {str(action)}")
+                self.debug_print(f"Timeout: {str(action)}")
                 action_state[index]["status"] = "TIMEOUT"
             elif isinstance(action, BaseException):
-                self.__debug_print(f"Error: {str(action)}")
+                self.debug_print(f"Error: {str(action)}")
                 action_state[index]["status"] = "ERROR"
             else:
                 err, data = process_schema(
                     self.__state_schema.properties.action, action)
                 if err:
-                    self.__debug_print(f"Invalid Action: {str(err)}")
+                    self.debug_print(f"Invalid Action: {str(err)}")
                     action_state[index]["status"] = "INVALID"
                 else:
                     action_state[index]["action"] = data
@@ -498,7 +498,7 @@ def __run_interpreter(self, state):
                 *args[:self.interpreter.__code__.co_argcount]))
             for agent in new_state:
                 if agent.status not in self.__state_schema.properties.status.enum:
-                    self.__debug_print(f"Invalid Action: {agent.status}")
+                    self.debug_print(f"Invalid Action: {agent.status}")
                     agent.status = "INVALID"
                 if agent.status in ["ERROR", "INVALID", "TIMEOUT"]:
                     agent.reward = None
@@ -595,6 +595,6 @@ def update_props(shared_state, state, schema_props):
 
         return update_props(self.state[0], state, self.__state_schema.properties)
 
-    def __debug_print(self, message):
+    def debug_print(self, message):
         if self.debug:
             print(message)
diff --git a/kaggle_environments/envs/battlegeese/battlegeese.json b/kaggle_environments/envs/battlegeese/battlegeese.json
@@ -33,8 +33,8 @@
     }
   },
   "reward": {
-    "description": "-1 = Lost, 0 = Draw/Ongoing, 1 = Won",
-    "enum": [-1, 0, 1],
+    "description": "The number of steps the goose has moved plus it's length.",
+    "type": "integer",
     "default": 0
   },
   "observation": {
diff --git a/kaggle_environments/envs/battlegeese/battlegeese.py b/kaggle_environments/envs/battlegeese/battlegeese.py
@@ -110,6 +110,11 @@ def interpreter(state, env):
             food.append(starting_positions[index + num_agents])
         return state
 
+    # Update active agents rewards.
+    for index, agent in enumerate(state):
+        if agent.status == "ACTIVE":
+            agent.reward = len(env.steps) + len(geese[index])
+
     # Apply the actions from active agents.
     for index, agent in enumerate(state):
         if agent.status != "ACTIVE":
@@ -121,15 +126,15 @@ def interpreter(state, env):
 
         # Wall Hit.
         if new_head == -1:
-            agent.status = f"Wall Hit: {action}"
-            agent.reward = 0
+            env.debug_print(f"Wall Hit: {action}")
+            agent.status = "INACTIVE"
             geese[index] = []
             continue
 
         # Last Body Hit.
         if len(goose) > 1 and goose[1] == new_head:
-            agent.status = f"Body Hit: {action}"
-            agent.reward = 0
+            env.debug_print(f"Body Hit: {action}")
+            agent.status = "INACTIVE"
             geese[index] = []
             continue
 
@@ -146,8 +151,8 @@ def interpreter(state, env):
         if len(env.steps) % hunger_rate == 0:
             goose.pop()
             if len(goose) == 0:
-                agent.status = f"Goose Starved: {action}"
-                agent.reward = 0
+                env.debug_print(f"Goose Starved: {action}")
+                agent.status = "INACTIVE"
                 geese[index] = []
                 continue
 
@@ -159,8 +164,8 @@ def interpreter(state, env):
     for index, agent in enumerate(state):
         for pos in geese[index]:
             if collisions[pos] > 1:
-                agent.status = f"Goose Collision: {agent.action}"
-                agent.reward = 0
+                env.debug_print(f"Goose Collision: {agent.action}")
+                agent.status = "INACTIVE"
                 geese[index] = []
                 continue
 
@@ -172,11 +177,10 @@ def interpreter(state, env):
                 available_positions.remove(pos)
         food.extend(sample(available_positions, min_food - len(food)))
 
-    # If only one ACTIVE agent left, set it's reward to 1 and make INACTIVE.
+    # If only one ACTIVE agent left, set it to INACTIVE.
     active_agents = [a for a in state if a.status == "ACTIVE"]
     if len(active_agents) == 1:
         active_agents[0].status = "INACTIVE"
-        active_agents[0].reward = 1
 
     return state