Add retry logic for task generation and improve goal observation tokens (#4081)

daveey · relh · web-flow · commit b07e5b991b13 · 2025-12-03T01:20:10.000Z
Co-authored-by: Richard Higgins &lt;richard@softmax.com&gt;
diff --git a/metta/cogworks/curriculum/curriculum_env.py b/metta/cogworks/curriculum/curriculum_env.py
@@ -65,10 +65,23 @@ def _add_curriculum_stats_to_info(self, info_dict: dict) -> None:
     def reset(self, *args, **kwargs):
         """Reset the environment and get a new task from curriculum."""
 
-        # Get a new task from curriculum
-        self._current_task = self._curriculum.get_task()
-        self._env.set_mg_config(self._current_task.get_env_cfg())
-        obs, info = self._env.reset(*args, **kwargs)
+        # Try to get a valid task and build the map
+        max_retries = 10
+        for attempt in range(max_retries):
+            try:
+                # Get a new task from curriculum
+                self._current_task = self._curriculum.get_task()
+                # Create the env config and build the map in try-catch
+                self._env.set_mg_config(self._current_task.get_env_cfg())
+                obs, info = self._env.reset(*args, **kwargs)
+                break
+            except Exception:
+                # If config is invalid or map building fails, request a new task
+                if attempt == max_retries - 1:
+                    # If we've exhausted retries, raise the exception
+                    raise
+                # Otherwise, try again with a new task
+                continue
 
         # Invalidate stats cache on reset
         self._stats_cache_valid = False
@@ -96,8 +109,23 @@ def step(self, *args, **kwargs):
             self._current_task.complete(mean_reward)
             # Update the curriculum algorithm with task performance for learning progress
             self._curriculum.update_task_performance(self._current_task._task_id, mean_reward)
-            self._current_task = self._curriculum.get_task()
-            self._env.set_mg_config(self._current_task.get_env_cfg())
+
+            # Try to get a valid task and build the map
+            max_retries = 10
+            for attempt in range(max_retries):
+                try:
+                    self._current_task = self._curriculum.get_task()
+                    # Create the env config and build the map in try-catch
+                    self._env.set_mg_config(self._current_task.get_env_cfg())
+                    break
+                except Exception:
+                    # If config is invalid or map building fails, return 0 reward and request a new task
+                    if attempt == max_retries - 1:
+                        # If we've exhausted retries, set rewards to 0 and continue
+                        rewards = rewards * 0
+                        break
+                    # Otherwise, try again with a new task
+                    continue
 
             # Invalidate stats cache when task changes
             self._stats_cache_valid = False
diff --git a/metta/cogworks/curriculum/learning_progress_algorithm.py b/metta/cogworks/curriculum/learning_progress_algorithm.py
@@ -98,6 +98,16 @@ def stats(self, prefix: str = "") -> Dict[str, float]:
         # Get base stats (required)
         stats = self.get_base_stats()
 
+        # Always include learning progress stats (not just when detailed logging is enabled)
+        if self.hypers.use_bidirectional:
+            lp_stats = self._get_bidirectional_detailed_stats()
+        else:
+            lp_stats = self._get_basic_detailed_stats()
+
+        # Add lp/ prefix to learning progress stats
+        for key, value in lp_stats.items():
+            stats[f"lp/{key}"] = value
+
         if self.enable_detailed_logging:
             detailed = self.get_detailed_stats()
             stats.update(detailed)
@@ -378,20 +388,13 @@ def on_task_created(self, task: CurriculumTask) -> None:
         self.invalidate_cache()
 
     def get_detailed_stats(self) -> Dict[str, float]:
-        """Get detailed stats including learning progress and slice distribution analysis."""
-        stats = super().get_detailed_stats()  # Gets slice analyzer stats
+        """Get detailed stats including slice distribution analysis.
 
-        # Always include learning progress stats (not just when detailed logging is enabled)
-        if self.hypers.use_bidirectional:
-            lp_stats = self._get_bidirectional_detailed_stats()
-        else:
-            lp_stats = self._get_basic_detailed_stats()
-
-        # Add lp/ prefix to learning progress stats
-        for key, value in lp_stats.items():
-            stats[f"lp/{key}"] = value
-
-        return stats
+        Note: Learning progress stats are always included in stats() regardless of
+        enable_detailed_logging, so they are not included here to avoid duplication.
+        """
+        # Only return slice analyzer stats (LP stats are always included in stats())
+        return super().get_detailed_stats()  # Gets slice analyzer stats
 
     def _get_bidirectional_detailed_stats(self) -> Dict[str, float]:
         """Get detailed bidirectional learning progress statistics."""
diff --git a/packages/cogames/src/cogames/cogs_vs_clips/evals/diagnostic_evals.py b/packages/cogames/src/cogames/cogs_vs_clips/evals/diagnostic_evals.py
@@ -433,12 +433,23 @@ def configure_env(self, cfg: MettaGridConfig) -> None:
 
         assembler = cfg.game.objects.get("assembler")
         if isinstance(assembler, AssemblerConfig):
+            # Check if a protocol with ["gear"] vibe already exists to avoid duplicates
+            has_gear_protocol = any(p.vibes == ["gear"] and p.min_agents == 0 for p in assembler.protocols)
+
             updated_protocols: list[ProtocolConfig] = []
+            modified_decoder = False
             for proto in assembler.protocols:
                 if proto.output_resources.get("decoder", 0) > 0:
-                    inputs = {res: 1 for res in non_clipped}
-                    updated_proto = proto.model_copy(update={"vibes": ["gear"], "input_resources": inputs})
-                    updated_protocols.append(updated_proto)
+                    if has_gear_protocol:
+                        # Preserve existing decoder recipe when a gear protocol already exists
+                        updated_protocols.append(proto)
+                    elif not modified_decoder:
+                        # Only modify the first decoder protocol to avoid duplicates
+                        inputs = {res: 1 for res in non_clipped}
+                        updated_proto = proto.model_copy(update={"vibes": ["gear"], "input_resources": inputs})
+                        updated_protocols.append(updated_proto)
+                        modified_decoder = True
+                    # Skip subsequent decoder protocols to avoid duplicates
                 else:
                     updated_protocols.append(proto)
             assembler.protocols = updated_protocols
diff --git a/packages/cogames/src/cogames/cogs_vs_clips/evals/difficulty_variants.py b/packages/cogames/src/cogames/cogs_vs_clips/evals/difficulty_variants.py
@@ -270,8 +270,9 @@ def _add_gear_protocol() -> None:
             if asm is None or not isinstance(asm, AssemblerConfig):
                 return
 
-            # Check if ['gear'] protocol already exists
-            if any(p.vibes == ["gear"] for p in asm.protocols):
+            # Check if ['gear'] protocol with same min_agents already exists
+            # C++ doesn't allow duplicate protocols with same vibes and min_agents
+            if any(p.vibes == ["gear"] and p.min_agents == 0 for p in asm.protocols):
                 return  # Already added
 
             # Add the ONE generic gear protocol for this variant
diff --git a/packages/cogames/src/cogames/cogs_vs_clips/stations.py b/packages/cogames/src/cogames/cogs_vs_clips/stations.py
@@ -119,9 +119,9 @@ def station_cfg(self) -> AssemblerConfig:
         )
 
 
-# Rare regenerates slowly. More cogs increase the amount extracted.
+# Rare, single-use. More cogs increase the amount extracted.
 class GermaniumExtractorConfig(ExtractorConfig):
-    max_uses: int = Field(default=5)
+    max_uses: int = Field(default=1)
     synergy: int = 50
 
     def station_cfg(self) -> AssemblerConfig:
diff --git a/packages/cogames/src/cogames/cogs_vs_clips/variants.py b/packages/cogames/src/cogames/cogs_vs_clips/variants.py
@@ -287,9 +287,19 @@ def modify_env(self, mission, env) -> None:
         if not isinstance(assembler_cfg, AssemblerConfig):
             raise TypeError("Expected 'assembler' to be AssemblerConfig")
         gear_outputs = {"decoder", "modulator", "scrambler", "resonator"}
-        for protocol in assembler_cfg.protocols:
-            if any(k in protocol.output_resources for k in gear_outputs):
-                protocol.vibes = ["gear"]
+
+        # Check if a protocol with ["gear"] vibe already exists
+        # If so, don't modify any protocols to avoid creating duplicates
+        has_gear_protocol = any(p.vibes == ["gear"] and p.min_agents == 0 for p in assembler_cfg.protocols)
+
+        if has_gear_protocol:
+            return
+
+        # Rewrite all gear recipes to use only the ["gear"] vibe, keep order intact.
+        assembler_cfg.protocols = [
+            p.model_copy(update={"vibes": ["gear"]}) if any(k in p.output_resources for k in gear_outputs) else p
+            for p in assembler_cfg.protocols
+        ]
 
 
 class InventoryHeartTuneVariant(MissionVariant):
diff --git a/packages/mettagrid/cpp/bindings/mettagrid_c.cpp b/packages/mettagrid/cpp/bindings/mettagrid_c.cpp
@@ -8,6 +8,7 @@
 #include <iostream>
 #include <numeric>
 #include <random>
+#include <string>
 #include <unordered_set>
 #include <vector>
 
@@ -347,6 +348,35 @@ void MettaGrid::_compute_observation(GridCoord observer_row,
     global_tokens.push_back({ObservationFeature::LastReward, reward_int});
   }
 
+  // Add goal tokens for rewarding resources when enabled
+  if (_global_obs_config.goal_obs) {
+    auto& agent = _agents[agent_idx];
+    // Track which resources we've already added goal tokens for
+    std::unordered_set<std::string> added_resources;
+    // Iterate through stat_rewards to find rewarding resources
+    for (const auto& [stat_name, reward_value] : agent->stat_rewards) {
+      // Extract resource name from stat name (e.g., "carbon.amount" -> "carbon", "carbon.gained" -> "carbon")
+      size_t dot_pos = stat_name.find('.');
+      if (dot_pos != std::string::npos) {
+        std::string resource_name = stat_name.substr(0, dot_pos);
+        // Only add one goal token per resource
+        if (added_resources.find(resource_name) == added_resources.end()) {
+          // Find the resource index in resource_names
+          for (size_t i = 0; i < resource_names.size(); i++) {
+            if (resource_names[i] == resource_name) {
+              // Get the inventory feature ID for this resource
+              ObservationType inventory_feature_id = _obs_encoder->get_inventory_feature_id(static_cast<InventoryItem>(i));
+              // Add a goal token with the resource's inventory feature ID as the value
+              global_tokens.push_back({ObservationFeature::Goal, inventory_feature_id});
+              added_resources.insert(resource_name);
+              break;
+            }
+          }
+        }
+      }
+    }
+  }
+
   // Global tokens are always at the center of the observation.
   uint8_t global_location =
       PackedCoordinate::pack(static_cast<uint8_t>(obs_height_radius), static_cast<uint8_t>(obs_width_radius));
diff --git a/packages/mettagrid/cpp/include/mettagrid/config/mettagrid_config.hpp b/packages/mettagrid/cpp/include/mettagrid/config/mettagrid_config.hpp
@@ -24,6 +24,7 @@ struct GlobalObsConfig {
   bool last_action = true;
   bool last_reward = true;
   bool compass = false;
+  bool goal_obs = false;
 };
 
 struct GameConfig {
@@ -57,15 +58,17 @@ namespace py = pybind11;
 inline void bind_global_obs_config(py::module& m) {
   py::class_<GlobalObsConfig>(m, "GlobalObsConfig")
       .def(py::init<>())
-      .def(py::init<bool, bool, bool, bool>(),
+      .def(py::init<bool, bool, bool, bool, bool>(),
            py::arg("episode_completion_pct") = true,
            py::arg("last_action") = true,
            py::arg("last_reward") = true,
-           py::arg("compass") = false)
+           py::arg("compass") = false,
+           py::arg("goal_obs") = false)
       .def_readwrite("episode_completion_pct", &GlobalObsConfig::episode_completion_pct)
       .def_readwrite("last_action", &GlobalObsConfig::last_action)
       .def_readwrite("last_reward", &GlobalObsConfig::last_reward)
-      .def_readwrite("compass", &GlobalObsConfig::compass);
+      .def_readwrite("compass", &GlobalObsConfig::compass)
+      .def_readwrite("goal_obs", &GlobalObsConfig::goal_obs);
 }
 
 inline void bind_game_config(py::module& m) {
diff --git a/packages/mettagrid/cpp/include/mettagrid/config/observation_features.hpp b/packages/mettagrid/cpp/include/mettagrid/config/observation_features.hpp
@@ -27,6 +27,7 @@ class ObservationFeaturesImpl {
     _cooldown_remaining = get("cooldown_remaining");
     _clipped = get("clipped");
     _remaining_uses = get("remaining_uses");
+    _goal = get("goal");
 
     // Initialize public members (must be done AFTER private members are set above)
     Group = _group;
@@ -40,6 +41,7 @@ class ObservationFeaturesImpl {
     CooldownRemaining = _cooldown_remaining;
     Clipped = _clipped;
     RemainingUses = _remaining_uses;
+    Goal = _goal;
   }
 
   // Get feature ID by name (throws if not found)
@@ -68,6 +70,7 @@ class ObservationFeaturesImpl {
   ObservationType CooldownRemaining;
   ObservationType Clipped;
   ObservationType RemainingUses;
+  ObservationType Goal;
 
 private:
   std::unordered_map<std::string, ObservationType> _name_to_id;
@@ -84,6 +87,7 @@ class ObservationFeaturesImpl {
   ObservationType _cooldown_remaining;
   ObservationType _clipped;
   ObservationType _remaining_uses;
+  ObservationType _goal;
 };
 
 // Global singleton instance
@@ -107,6 +111,7 @@ extern ObservationType Tag;
 extern ObservationType CooldownRemaining;
 extern ObservationType Clipped;
 extern ObservationType RemainingUses;
+extern ObservationType Goal;
 }  // namespace ObservationFeature
 
 #endif  // PACKAGES_METTAGRID_CPP_INCLUDE_METTAGRID_CONFIG_OBSERVATION_FEATURES_HPP_
diff --git a/packages/mettagrid/cpp/src/mettagrid/config/observation_features.cpp b/packages/mettagrid/cpp/src/mettagrid/config/observation_features.cpp
@@ -16,6 +16,7 @@ ObservationType Tag;
 ObservationType CooldownRemaining;
 ObservationType Clipped;
 ObservationType RemainingUses;
+ObservationType Goal;
 
 void Initialize(const std::unordered_map<std::string, ObservationType>& feature_ids) {
   _instance = std::make_shared<ObservationFeaturesImpl>(feature_ids);
@@ -32,5 +33,6 @@ void Initialize(const std::unordered_map<std::string, ObservationType>& feature_
   CooldownRemaining = _instance->CooldownRemaining;
   Clipped = _instance->Clipped;
   RemainingUses = _instance->RemainingUses;
+  Goal = _instance->Goal;
 }
 }  // namespace ObservationFeature
diff --git a/packages/mettagrid/python/src/mettagrid/config/id_map.py b/packages/mettagrid/python/src/mettagrid/config/id_map.py
@@ -93,6 +93,10 @@ def _compute_features(self) -> list[ObservationFeatureSpec]:
         features.append(ObservationFeatureSpec(id=feature_id, normalization=100.0, name="last_reward"))
         feature_id += 1
 
+        # Goal feature (for indicating rewarding resources)
+        features.append(ObservationFeatureSpec(id=feature_id, normalization=100.0, name="goal"))
+        feature_id += 1
+
         # Agent-specific features
         features.append(ObservationFeatureSpec(id=feature_id, normalization=255.0, name="vibe"))
         feature_id += 1
diff --git a/packages/mettagrid/python/src/mettagrid/config/mettagrid_c_config.py b/packages/mettagrid/python/src/mettagrid/config/mettagrid_c_config.py
@@ -347,6 +347,7 @@ def convert_to_cpp_game_config(mettagrid_config: dict | GameConfig):
         last_action=global_obs_config.last_action,
         last_reward=global_obs_config.last_reward,
         compass=global_obs_config.compass,
+        goal_obs=global_obs_config.goal_obs,
     )
     game_cpp_params["global_obs"] = global_obs_cpp
 
diff --git a/packages/mettagrid/python/src/mettagrid/config/mettagrid_config.py b/packages/mettagrid/python/src/mettagrid/config/mettagrid_config.py
@@ -194,6 +194,9 @@ class GlobalObsConfig(Config):
     # Compass token that points toward the assembler/hub center
     compass: bool = Field(default=False)
 
+    # Goal tokens that indicate rewarding resources
+    goal_obs: bool = Field(default=False)
+
 
 class GridObjectConfig(Config):
     """Base configuration for all grid objects.
diff --git a/packages/mettagrid/python/src/mettagrid/mapgen/scenes/bsp.py b/packages/mettagrid/python/src/mettagrid/mapgen/scenes/bsp.py
@@ -98,8 +98,19 @@ def render(self):
             if zone1.y > zone2.y:
                 (zone1, zone2) = (zone2, zone1)
 
-            surface1 = Surface.from_zone(used_grid, zone1, "up")
-            surface2 = Surface.from_zone(used_grid, zone2, "down")
+            try:
+                surface1 = Surface.from_zone(used_grid, zone1, "up")
+                surface2 = Surface.from_zone(used_grid, zone2, "down")
+            except ValueError as e:
+                # Skip corridor connection if surfaces can't be found
+                # This can happen if a zone doesn't have a valid room
+                logger.warning(
+                    "Could not find surface for corridor connection between zones %s and %s: %s. Skipping corridor.",
+                    zone1,
+                    zone2,
+                    e,
+                )
+                continue
 
             lines = connect_surfaces(surface1, surface2)
 
diff --git a/packages/mettagrid/tests/test_mettagrid.cpp b/packages/mettagrid/tests/test_mettagrid.cpp
@@ -55,6 +55,7 @@ class MettaGridCppTest : public ::testing::Test {
         {"agent:frozen", 2},
         {"episode_completion_pct", 7},
         {"last_action", 8},
+        {"goal", 9},
         {"last_reward", 10},
         {"vibe", 11},
         {"agent:vibe", 12},
diff --git a/recipes/experiment/cogs_v_clips.py b/recipes/experiment/cogs_v_clips.py

Original file line number	Diff line number	Diff line change
`@@ -119,9 +119,9 @@ def station_cfg(self) -> AssemblerConfig:`
`119`	`119`	`)`
`120`	`120`
`121`	`121`
`122`		`-# Rare regenerates slowly. More cogs increase the amount extracted.`
	`122`	`+# Rare, single-use. More cogs increase the amount extracted.`
`123`	`123`	`class GermaniumExtractorConfig(ExtractorConfig):`
`124`		`- max_uses: int = Field(default=5)`
	`124`	`+ max_uses: int = Field(default=1)`
`125`	`125`	`synergy: int = 50`
`126`	`126`
`127`	`127`	`def station_cfg(self) -> AssemblerConfig:`