trishullab · GeorgeTsoukalas · May 15, 2025 · May 15, 2025
diff --git a/frame/configs/funsearch-low-budget.yaml b/frame/configs/funsearch-low-budget.yaml
@@ -0,0 +1,79 @@
+# Low settings configuration for FunSearch
+# Based on funsearch.implementation.config but with minimal resources
+
+defaults:
+  - theory_building/production_rules: default  # Use all available rules
+  - theory_building/interestingness: default  # Use default interestingness measures
+  - theory_building/initial_state: arithmetic_basic # Default initial state
+  # Note(George; 4/25): This is the initial state that will be used, can be subbed out for: succ_zero, succ_zero_eq and arithmetic_basic
+  - interestingness: default  # Use default HR interestingness functions
+  - _self_
+
+# Note(George; 5/5): The defaults above do not load properly in funsearch experiments due to a bug in the recursive config loading.
+# So I am just adding 
+
+
+programs_database:
+  functions_per_prompt: 2
+  num_islands: 3
+  reset_period: 360000 # 1 hour in seconds
+  cluster_sampling_temperature_init: 0.01
+  cluster_sampling_temperature_period: 1000
+samples_per_prompt: 2
+num_samplers: 2 # Single-threaded setup
+num_evaluators: 2 # Single-threaded setup
+
+# LLM Config (For frame.tools.llm_caller)
+llm:
+  model_name: "gpt-4o" # Less expensive model
+  temperature: 1.0
+  top_p: 0.95
+  max_tokens: 16384 # Reduced token count
+  # API key should be set via environment variable OPENAI_API_KEY
+
+# Path to the interestingness prompt YAML file
+prompt_yaml_path: ${hydra:runtime.cwd}/frame/configs/prompts/interestingness_prompt.yaml
+
+# FRAME Evaluation Settings (used by TheoryBuilderSandbox)
+# NOTE: base_theory_builder_config is now expected via command-line override or parent config
+# tb_config_path: ??? # REQUIRED: Set via command line: python -m frame.funsearch.main tb_config_path=/path/to/your/tb_config.yaml
+tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interestingness_funsearch.yaml" # Provide the default (which is the generate_interestingness exp)
+frame: 
+  evaluation_episodes_M: 16 # Reduced episodes count (M)
+  evaluation_timeout_seconds: 30 # Reduced timeout to 5 minutes
+  save_eval_visualizations: false # Flag to enable visualizations during evaluation
+
+# Template/Specification File
+spec_path: "${hydra:runtime.cwd}/frame/funsearch/interestingness_spec.py"
+
+# Persistence
+database_backup_path: "${hydra:runtime.output_dir}/population_backup.pkl"
+backup_frequency_seconds: 300 # Save DB every 5 minutes
+
+# Output Dirs
+output_directory: "${hydra:runtime.output_dir}"
+evaluations_dir: "${hydra:runtime.output_dir}/evaluations"
+
+# Iterations of the Funsearch sampling loop
+iterations: 32
+
+# --- FunSearch Abstraction Settings ---
+abstraction:
+  enabled: true                      # Master switch: set to false to disable the entire abstraction mechanism
+  frequency: 3                       # Run the abstraction step every N main FunSearch iterations (low for testing).
+  programs_to_sample: 2              # How many top programs to analyze (low for testing).
+  max_abstractions_per_step: 2       # Target new abstractions per step (low for testing).
+  # Optional LLM override block for abstraction task
+  llm:
+    model_name: null                 # Default: null (use main llm config)
+    temperature: null                # Default: null (use main llm config)
+    top_p: null                      # Default: null (use main llm config)
+    max_tokens: 8192                 # Can override max tokens if needed
+  prompt_yaml_path: "${hydra:runtime.cwd}/frame/configs/prompts/abstraction_prompt.yaml" # Specific instructions for generating abstractions
+
+# Hydra settings
+hydra:
+  run:
+    dir: outputs/funsearch_runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
+  job:
+    chdir: true 
diff --git a/frame/configs/funsearch-mid.yaml b/frame/configs/funsearch-mid.yaml
@@ -28,7 +28,7 @@ llm:
   model_name: "gpt-4o" # Less expensive model
   temperature: 1.0
   top_p: 0.95
-  max_tokens: 1000 # Reduced token count
+  max_tokens: 16384 # Reduced token count
   # API key should be set via environment variable OPENAI_API_KEY
 
 # Path to the interestingness prompt YAML file
@@ -41,7 +41,7 @@ tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interesti
 frame: 
   evaluation_episodes_M: 64 # Reduced episodes count (M)
   evaluation_timeout_seconds: 120 # Reduced timeout to 5 minutes
-  save_eval_visualizations: true # Flag to enable visualizations during evaluation
+  save_eval_visualizations: false # Flag to enable visualizations during evaluation
 
 # Template/Specification File
 spec_path: "${hydra:runtime.cwd}/frame/funsearch/interestingness_spec.py"

diff --git a/frame/configs/succ_zero_generate_interestingness_funsearch.yaml b/frame/configs/succ_zero_generate_interestingness_funsearch.yaml
@@ -20,15 +20,32 @@ initial_state:
     - "create_successor_concept"
     - "create_equality_concept" 
 
+# initial_state:
+#   _target_: frame.knowledge_base.knowledge_graph.KnowledgeGraph
+#   name: "arithmetic_basic_initial"
+#   description: "Initial knowledge graph with arithmetic basic concepts"
+
+#   # Import settings
+#   import_from: "frame.knowledge_base.initial_concepts"
+#   concepts:
+#     - "create_zero_concept"
+#     - "create_one_concept"
+#     - "create_two_concept"
+#     - "create_addition_concept"
+#     - "create_multiplication_concept"
+#     - "create_divides_concept"
+#     - "create_leq_than_concept"
+#     - "create_equality_concept" 
+
 # Experiment settings override
 experiment:
   name: "succ_zero_multi_generated_interestingness_eval"
   description: "Generate N interestingness functions and evaluate each M times, starting from successor and zero."
   max_steps: 1000
-  num_episodes: 64
-  num_workers: 64
+  num_episodes: 16
+  num_workers: 2
   seed: 0 #${oc.env:RANDOM_SEED,12345}
-  episode_timeout_seconds: 120 # Timeout for the main process waiting for a worker episode result
+  episode_timeout_seconds: 30 # Timeout for the main process waiting for a worker episode result
 
   # --- New settings for N x M Evaluation ---
   evaluate_multiple_interestingness: True
@@ -55,7 +72,7 @@ policy:
   params:
     concept_selection: INTERESTINGNESS
     action_selection: SIMULATE_AND_SCORE # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified.
-    top_k_concepts: 8
+    top_k_concepts: 4
     temperature: 1.0
     generate_interestingness: True
     interestingness_function_path: ""

diff --git a/frame/funsearch/main.py b/frame/funsearch/main.py
@@ -65,6 +65,7 @@ def __init__(self, cfg: DictConfig):
         """
         self.cfg = cfg
         self.iteration_rewards_log: List[Tuple[int, float, Optional[str]]] = [] # To store (iteration, reward, best_program_code_at_iteration_end)
+        self.island_progress_log: List[Tuple[int, int, float]] = [] # To store (iteration, island_id, best_score_for_island_at_iteration)
 
         # Store the path to the TheoryBuilder base configuration
         if not hasattr(self.cfg, 'tb_config_path') or not self.cfg.tb_config_path:
@@ -453,7 +454,15 @@ def run(self, iterations: int = 10):
                 # Run one round of sampling and evaluation (mutation)
                 logger.info("Running Mutation Sampler...")
                 # Sampler uses the latest state of the database (potentially updated by abstraction)
-                self.mutation_sampler.sample(iteration=i) 
+                self.mutation_sampler.sample(iteration=i)
+
+                # --- Populate island_progress_log ---
+                if hasattr(self.programs_db, '_best_score_per_island') and self.programs_db._best_score_per_island:
+                    num_islands_for_log = len(self.programs_db._best_score_per_island)
+                    for island_idx in range(num_islands_for_log):
+                        current_island_score = self.programs_db._best_score_per_island[island_idx]
+                        self.island_progress_log.append((i + 1, island_idx, current_island_score))
+                # --- End populate island_progress_log ---
 
                 # Save the database periodically
                 self.save_database()
@@ -485,6 +494,8 @@ def run(self, iterations: int = 10):
             self._log_final_abstraction_summary()
             # Log iteration rewards summary
             self._log_iteration_rewards()
+            # Log island progress summary
+            self._log_island_progress_summary()
 
     def _log_iteration_rewards(self):
         """Logs the best reward achieved at the end of each iteration, followed by a simple list of rewards."""
@@ -507,6 +518,67 @@ def _log_iteration_rewards(self):
 
         logger.info("--- End Iteration Rewards Summary ---")
 
+    def _log_island_progress_summary(self):
+        """Logs a summary of the best score achieved by each island at each iteration."""
+        logger.info("--- Island Progress Summary ---") # Simplified title
+
+        # Part 1: Per-iteration scores (current bests at that iteration)
+        logger.info("--- Per-Iteration Island Best Scores ---")
+        if not self.island_progress_log:
+            logger.info("No island progress recorded for per-iteration view.")
+        else:
+            iterations_logged = sorted(list(set(item[0] for item in self.island_progress_log)))
+            for iteration_num in iterations_logged:
+                logger.info(f"  Iteration {iteration_num}:")
+                iteration_specific_logs = [log for log in self.island_progress_log if log[0] == iteration_num]
+                iteration_specific_logs.sort(key=lambda x: x[1]) 
+                for _, island_id, score in iteration_specific_logs:
+                    score_str = f"{score:.4f}" if isinstance(score, float) and score != float('-inf') else str(score)
+                    logger.info(f"    Island {island_id}: Current Best Score = {score_str}")
+
+        # Part 2: NEW - Historical best scores per island (list of scores over iterations)
+        logger.info("\n--- Historical Best Scores Per Island (across iterations) ---")
+        if not self.island_progress_log:
+            logger.info("No island progress recorded to show historical scores.")
+        else:
+            try:
+                num_islands = self.cfg.programs_database.num_islands
+                all_island_historical_scores = {island_idx: [] for island_idx in range(num_islands)}
+
+                # Temporary structure to hold (iteration, score) tuples for sorting
+                temp_island_scores_with_iter = {island_idx: [] for island_idx in range(num_islands)}
+
+                for iteration_num, island_id, score in self.island_progress_log:
+                    if 0 <= island_id < num_islands: # Ensure island_id is valid
+                        temp_island_scores_with_iter[island_id].append((iteration_num, score))
+
+                for island_idx in range(num_islands):
+                    # Sort by iteration number before extracting scores
+                    sorted_scores_for_island = sorted(temp_island_scores_with_iter[island_idx], key=lambda x: x[0])
+                    # Extract just the scores for the list
+                    historical_scores_list = [
+                        f"{s:.4f}" if isinstance(s, float) and s != float('-inf') else str(s) 
+                        for _, s in sorted_scores_for_island
+                    ]
+                    all_island_historical_scores[island_idx] = historical_scores_list
+                    logger.info(f"  Island {island_idx}: Best scores over iterations = {all_island_historical_scores[island_idx]}")
+            except Exception as e:
+                logger.error(f"Error generating historical scores per island: {e}", exc_info=True)
+                logger.info("Could not reliably determine number of islands or process historical data.")
+
+        # Part 3: Final best scores per island (single best score at the end)
+        logger.info("\n--- Final Best Scores Per Island ---")
+        if hasattr(self.programs_db, '_best_score_per_island') and self.programs_db._best_score_per_island:
+            final_best_scores_values = []
+            for island_idx, score in enumerate(self.programs_db._best_score_per_island):
+                score_str = f"{score:.4f}" if isinstance(score, float) and score != float('-inf') else str(score)
+                logger.info(f"  Island {island_idx}: Final Best Score = {score_str}")
+                final_best_scores_values.append(score)
+            logger.info(f"List of final best scores across islands: {[f'{s:.4f}' if isinstance(s, float) and s != float('-inf') else str(s) for s in final_best_scores_values]}")
+        else:
+            logger.info("Could not retrieve final best scores per island from the database.")
+        logger.info("--- End Island Progress Summary ---")
+
     def _get_overall_best_program_info(self) -> Tuple[float, Optional[str], int]:
         """Helper to get current best program's reward, code, and island ID."""
         best_reward = float('-inf')

diff --git a/run_parallel_funsearch.sh b/run_parallel_funsearch.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+# Configuration
+NUM_PARALLEL_RUNS=32  # N: Number of parallel FunSearch instances per iteration
+FUNSEARCH_SCRIPT="scripts/run_funsearch_smaller_budget.sh" # Relative path from project root
+NUM_ITERATIONS=1     # M: Number of times to run the block of N parallel instances
+
+# This script assumes it is run from the project root directory (e.g., ~/Projects/FRAME)
+# The FUNSEARCH_SCRIPT path is relative to this root.
+
+# Check if the FunSearch script exists at the expected relative path
+if [ ! -f "$FUNSEARCH_SCRIPT" ]; then
+    echo "Error: FunSearch script not found at $(pwd)/$FUNSEARCH_SCRIPT"
+    echo "Please ensure you are running this script from the project root directory (e.g., ~/Projects/FRAME)."
+    exit 1
+fi
+
+# Ensure the target script is executable
+chmod +x "$FUNSEARCH_SCRIPT"
+
+echo "Starting $NUM_ITERATIONS iteration(s) of $NUM_PARALLEL_RUNS parallel FunSearch runs each."
+echo "Running from: $(pwd)" # Should be the project root
+
+for iter_num in $(seq 1 $NUM_ITERATIONS)
+do
+    echo "---------------------------------------------------"
+    echo "Starting Iteration $iter_num of $NUM_ITERATIONS..."
+    echo "---------------------------------------------------"
+
+    for i in $(seq 1 $NUM_PARALLEL_RUNS)
+    do
+        echo "Iteration $iter_num, Launching instance $i of $NUM_PARALLEL_RUNS: $FUNSEARCH_SCRIPT"
+        # The FUNSEARCH_SCRIPT itself (e.g., scripts/run_funsearch_test.sh) handles cd-ing to the project root.
+        # We call it using its path relative to the current directory (which should be project root).
+        (bash "$FUNSEARCH_SCRIPT" &)
+
+        # Wait for 2 seconds before launching the next instance, if it's not the last one in this block.
+        if [ "$i" -lt "$NUM_PARALLEL_RUNS" ]; then
+            echo "Iteration $iter_num, Instance $i: Waiting 2 seconds before launching next instance..."
+            sleep 2
+        fi
+    done
+
+    echo "Iteration $iter_num: All $NUM_PARALLEL_RUNS instances launched. Waiting for this block to complete..."
+    # Wait for all background jobs started in this iteration's block to complete
+    wait
+    echo "Iteration $iter_num: All $NUM_PARALLEL_RUNS instances in this block have completed."
+
+done
+
+echo "---------------------------------------------------"
+echo "All $NUM_ITERATIONS iteration(s) completed." 
diff --git a/scripts/run_funsearch_smaller_budget.sh b/scripts/run_funsearch_smaller_budget.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+# Script to run FunSearch WITH the abstraction mechanism ENABLED, using Hydra defaults.
+
+echo "Running FunSearch experiment with abstraction enabled (using defaults from mid.yaml)..."
+
+# --- Configurable Parameters ---
+# Most parameters are now expected to be set in configs/funsearch/mid.yaml
+# You can still override specific ones via the command line below if needed.
+# OUTPUT_DIR is now managed by Hydra.
+# --- End Configurable Parameters ---
+
+# Go to the project root directory
+cd "$(dirname "$0")/.."
+
+# Hydra will create the output directory.
+
+echo "Output directory will be managed by Hydra."
+echo "Running with defaults from mid.yaml and:"
+echo "  Abstraction: ENABLED"
+
+# Run the FunSearch main script using Hydra overrides
+# Only enabling abstraction explicitly.
+nohup python -m frame.funsearch.main \
+  --config-name funsearch-low-budget \
+  abstraction.enabled=true
+
+echo "FunSearch experiment with abstraction completed!"
+# Output directory location depends on Hydra configuration (e.g., ./outputs/YYYY-MM-DD/HH-MM-SS)
+echo "Results saved in Hydra output directory."