diff --git a/frame/configs/funsearch-low-budget.yaml b/frame/configs/funsearch-low-budget.yaml new file mode 100644 index 0000000..9503992 --- /dev/null +++ b/frame/configs/funsearch-low-budget.yaml @@ -0,0 +1,79 @@ +# Low settings configuration for FunSearch +# Based on funsearch.implementation.config but with minimal resources + +defaults: + - theory_building/production_rules: default # Use all available rules + - theory_building/interestingness: default # Use default interestingness measures + - theory_building/initial_state: arithmetic_basic # Default initial state + # Note(George; 4/25): This is the initial state that will be used, can be subbed out for: succ_zero, succ_zero_eq and arithmetic_basic + - interestingness: default # Use default HR interestingness functions + - _self_ + +# Note(George; 5/5): The defaults above do not load properly in funsearch experiments due to a bug in the recursive config loading. +# So I am just adding + + +programs_database: + functions_per_prompt: 2 + num_islands: 3 + reset_period: 360000 # 1 hour in seconds + cluster_sampling_temperature_init: 0.01 + cluster_sampling_temperature_period: 1000 +samples_per_prompt: 2 +num_samplers: 2 # Single-threaded setup +num_evaluators: 2 # Single-threaded setup + +# LLM Config (For frame.tools.llm_caller) +llm: + model_name: "gpt-4o" # Less expensive model + temperature: 1.0 + top_p: 0.95 + max_tokens: 16384 # Reduced token count + # API key should be set via environment variable OPENAI_API_KEY + +# Path to the interestingness prompt YAML file +prompt_yaml_path: ${hydra:runtime.cwd}/frame/configs/prompts/interestingness_prompt.yaml + +# FRAME Evaluation Settings (used by TheoryBuilderSandbox) +# NOTE: base_theory_builder_config is now expected via command-line override or parent config +# tb_config_path: ??? # REQUIRED: Set via command line: python -m frame.funsearch.main tb_config_path=/path/to/your/tb_config.yaml +tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interestingness_funsearch.yaml" # Provide the default (which is the generate_interestingness exp) +frame: + evaluation_episodes_M: 16 # Reduced episodes count (M) + evaluation_timeout_seconds: 30 # Reduced timeout to 5 minutes + save_eval_visualizations: false # Flag to enable visualizations during evaluation + +# Template/Specification File +spec_path: "${hydra:runtime.cwd}/frame/funsearch/interestingness_spec.py" + +# Persistence +database_backup_path: "${hydra:runtime.output_dir}/population_backup.pkl" +backup_frequency_seconds: 300 # Save DB every 5 minutes + +# Output Dirs +output_directory: "${hydra:runtime.output_dir}" +evaluations_dir: "${hydra:runtime.output_dir}/evaluations" + +# Iterations of the Funsearch sampling loop +iterations: 32 + +# --- FunSearch Abstraction Settings --- +abstraction: + enabled: true # Master switch: set to false to disable the entire abstraction mechanism + frequency: 3 # Run the abstraction step every N main FunSearch iterations (low for testing). + programs_to_sample: 2 # How many top programs to analyze (low for testing). + max_abstractions_per_step: 2 # Target new abstractions per step (low for testing). + # Optional LLM override block for abstraction task + llm: + model_name: null # Default: null (use main llm config) + temperature: null # Default: null (use main llm config) + top_p: null # Default: null (use main llm config) + max_tokens: 8192 # Can override max tokens if needed + prompt_yaml_path: "${hydra:runtime.cwd}/frame/configs/prompts/abstraction_prompt.yaml" # Specific instructions for generating abstractions + +# Hydra settings +hydra: + run: + dir: outputs/funsearch_runs/${now:%Y-%m-%d}/${now:%H-%M-%S} + job: + chdir: true \ No newline at end of file diff --git a/frame/configs/funsearch-mid.yaml b/frame/configs/funsearch-mid.yaml index 3171949..c58b76f 100644 --- a/frame/configs/funsearch-mid.yaml +++ b/frame/configs/funsearch-mid.yaml @@ -28,7 +28,7 @@ llm: model_name: "gpt-4o" # Less expensive model temperature: 1.0 top_p: 0.95 - max_tokens: 1000 # Reduced token count + max_tokens: 16384 # Reduced token count # API key should be set via environment variable OPENAI_API_KEY # Path to the interestingness prompt YAML file @@ -41,7 +41,7 @@ tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interesti frame: evaluation_episodes_M: 64 # Reduced episodes count (M) evaluation_timeout_seconds: 120 # Reduced timeout to 5 minutes - save_eval_visualizations: true # Flag to enable visualizations during evaluation + save_eval_visualizations: false # Flag to enable visualizations during evaluation # Template/Specification File spec_path: "${hydra:runtime.cwd}/frame/funsearch/interestingness_spec.py" diff --git a/frame/configs/succ_zero_generate_interestingness_funsearch.yaml b/frame/configs/succ_zero_generate_interestingness_funsearch.yaml index 12bc3f1..f9a2485 100644 --- a/frame/configs/succ_zero_generate_interestingness_funsearch.yaml +++ b/frame/configs/succ_zero_generate_interestingness_funsearch.yaml @@ -20,15 +20,32 @@ initial_state: - "create_successor_concept" - "create_equality_concept" +# initial_state: +# _target_: frame.knowledge_base.knowledge_graph.KnowledgeGraph +# name: "arithmetic_basic_initial" +# description: "Initial knowledge graph with arithmetic basic concepts" + +# # Import settings +# import_from: "frame.knowledge_base.initial_concepts" +# concepts: +# - "create_zero_concept" +# - "create_one_concept" +# - "create_two_concept" +# - "create_addition_concept" +# - "create_multiplication_concept" +# - "create_divides_concept" +# - "create_leq_than_concept" +# - "create_equality_concept" + # Experiment settings override experiment: name: "succ_zero_multi_generated_interestingness_eval" description: "Generate N interestingness functions and evaluate each M times, starting from successor and zero." max_steps: 1000 - num_episodes: 64 - num_workers: 64 + num_episodes: 16 + num_workers: 2 seed: 0 #${oc.env:RANDOM_SEED,12345} - episode_timeout_seconds: 120 # Timeout for the main process waiting for a worker episode result + episode_timeout_seconds: 30 # Timeout for the main process waiting for a worker episode result # --- New settings for N x M Evaluation --- evaluate_multiple_interestingness: True @@ -55,7 +72,7 @@ policy: params: concept_selection: INTERESTINGNESS action_selection: SIMULATE_AND_SCORE # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified. - top_k_concepts: 8 + top_k_concepts: 4 temperature: 1.0 generate_interestingness: True interestingness_function_path: "" diff --git a/frame/funsearch/main.py b/frame/funsearch/main.py index c20d663..2ed10cc 100644 --- a/frame/funsearch/main.py +++ b/frame/funsearch/main.py @@ -65,6 +65,7 @@ def __init__(self, cfg: DictConfig): """ self.cfg = cfg self.iteration_rewards_log: List[Tuple[int, float, Optional[str]]] = [] # To store (iteration, reward, best_program_code_at_iteration_end) + self.island_progress_log: List[Tuple[int, int, float]] = [] # To store (iteration, island_id, best_score_for_island_at_iteration) # Store the path to the TheoryBuilder base configuration if not hasattr(self.cfg, 'tb_config_path') or not self.cfg.tb_config_path: @@ -453,7 +454,15 @@ def run(self, iterations: int = 10): # Run one round of sampling and evaluation (mutation) logger.info("Running Mutation Sampler...") # Sampler uses the latest state of the database (potentially updated by abstraction) - self.mutation_sampler.sample(iteration=i) + self.mutation_sampler.sample(iteration=i) + + # --- Populate island_progress_log --- + if hasattr(self.programs_db, '_best_score_per_island') and self.programs_db._best_score_per_island: + num_islands_for_log = len(self.programs_db._best_score_per_island) + for island_idx in range(num_islands_for_log): + current_island_score = self.programs_db._best_score_per_island[island_idx] + self.island_progress_log.append((i + 1, island_idx, current_island_score)) + # --- End populate island_progress_log --- # Save the database periodically self.save_database() @@ -485,6 +494,8 @@ def run(self, iterations: int = 10): self._log_final_abstraction_summary() # Log iteration rewards summary self._log_iteration_rewards() + # Log island progress summary + self._log_island_progress_summary() def _log_iteration_rewards(self): """Logs the best reward achieved at the end of each iteration, followed by a simple list of rewards.""" @@ -507,6 +518,67 @@ def _log_iteration_rewards(self): logger.info("--- End Iteration Rewards Summary ---") + def _log_island_progress_summary(self): + """Logs a summary of the best score achieved by each island at each iteration.""" + logger.info("--- Island Progress Summary ---") # Simplified title + + # Part 1: Per-iteration scores (current bests at that iteration) + logger.info("--- Per-Iteration Island Best Scores ---") + if not self.island_progress_log: + logger.info("No island progress recorded for per-iteration view.") + else: + iterations_logged = sorted(list(set(item[0] for item in self.island_progress_log))) + for iteration_num in iterations_logged: + logger.info(f" Iteration {iteration_num}:") + iteration_specific_logs = [log for log in self.island_progress_log if log[0] == iteration_num] + iteration_specific_logs.sort(key=lambda x: x[1]) + for _, island_id, score in iteration_specific_logs: + score_str = f"{score:.4f}" if isinstance(score, float) and score != float('-inf') else str(score) + logger.info(f" Island {island_id}: Current Best Score = {score_str}") + + # Part 2: NEW - Historical best scores per island (list of scores over iterations) + logger.info("\n--- Historical Best Scores Per Island (across iterations) ---") + if not self.island_progress_log: + logger.info("No island progress recorded to show historical scores.") + else: + try: + num_islands = self.cfg.programs_database.num_islands + all_island_historical_scores = {island_idx: [] for island_idx in range(num_islands)} + + # Temporary structure to hold (iteration, score) tuples for sorting + temp_island_scores_with_iter = {island_idx: [] for island_idx in range(num_islands)} + + for iteration_num, island_id, score in self.island_progress_log: + if 0 <= island_id < num_islands: # Ensure island_id is valid + temp_island_scores_with_iter[island_id].append((iteration_num, score)) + + for island_idx in range(num_islands): + # Sort by iteration number before extracting scores + sorted_scores_for_island = sorted(temp_island_scores_with_iter[island_idx], key=lambda x: x[0]) + # Extract just the scores for the list + historical_scores_list = [ + f"{s:.4f}" if isinstance(s, float) and s != float('-inf') else str(s) + for _, s in sorted_scores_for_island + ] + all_island_historical_scores[island_idx] = historical_scores_list + logger.info(f" Island {island_idx}: Best scores over iterations = {all_island_historical_scores[island_idx]}") + except Exception as e: + logger.error(f"Error generating historical scores per island: {e}", exc_info=True) + logger.info("Could not reliably determine number of islands or process historical data.") + + # Part 3: Final best scores per island (single best score at the end) + logger.info("\n--- Final Best Scores Per Island ---") + if hasattr(self.programs_db, '_best_score_per_island') and self.programs_db._best_score_per_island: + final_best_scores_values = [] + for island_idx, score in enumerate(self.programs_db._best_score_per_island): + score_str = f"{score:.4f}" if isinstance(score, float) and score != float('-inf') else str(score) + logger.info(f" Island {island_idx}: Final Best Score = {score_str}") + final_best_scores_values.append(score) + logger.info(f"List of final best scores across islands: {[f'{s:.4f}' if isinstance(s, float) and s != float('-inf') else str(s) for s in final_best_scores_values]}") + else: + logger.info("Could not retrieve final best scores per island from the database.") + logger.info("--- End Island Progress Summary ---") + def _get_overall_best_program_info(self) -> Tuple[float, Optional[str], int]: """Helper to get current best program's reward, code, and island ID.""" best_reward = float('-inf') diff --git a/run_parallel_funsearch.sh b/run_parallel_funsearch.sh new file mode 100755 index 0000000..8425338 --- /dev/null +++ b/run_parallel_funsearch.sh @@ -0,0 +1,52 @@ +#!/bin/bash + +# Configuration +NUM_PARALLEL_RUNS=32 # N: Number of parallel FunSearch instances per iteration +FUNSEARCH_SCRIPT="scripts/run_funsearch_smaller_budget.sh" # Relative path from project root +NUM_ITERATIONS=1 # M: Number of times to run the block of N parallel instances + +# This script assumes it is run from the project root directory (e.g., ~/Projects/FRAME) +# The FUNSEARCH_SCRIPT path is relative to this root. + +# Check if the FunSearch script exists at the expected relative path +if [ ! -f "$FUNSEARCH_SCRIPT" ]; then + echo "Error: FunSearch script not found at $(pwd)/$FUNSEARCH_SCRIPT" + echo "Please ensure you are running this script from the project root directory (e.g., ~/Projects/FRAME)." + exit 1 +fi + +# Ensure the target script is executable +chmod +x "$FUNSEARCH_SCRIPT" + +echo "Starting $NUM_ITERATIONS iteration(s) of $NUM_PARALLEL_RUNS parallel FunSearch runs each." +echo "Running from: $(pwd)" # Should be the project root + +for iter_num in $(seq 1 $NUM_ITERATIONS) +do + echo "---------------------------------------------------" + echo "Starting Iteration $iter_num of $NUM_ITERATIONS..." + echo "---------------------------------------------------" + + for i in $(seq 1 $NUM_PARALLEL_RUNS) + do + echo "Iteration $iter_num, Launching instance $i of $NUM_PARALLEL_RUNS: $FUNSEARCH_SCRIPT" + # The FUNSEARCH_SCRIPT itself (e.g., scripts/run_funsearch_test.sh) handles cd-ing to the project root. + # We call it using its path relative to the current directory (which should be project root). + (bash "$FUNSEARCH_SCRIPT" &) + + # Wait for 2 seconds before launching the next instance, if it's not the last one in this block. + if [ "$i" -lt "$NUM_PARALLEL_RUNS" ]; then + echo "Iteration $iter_num, Instance $i: Waiting 2 seconds before launching next instance..." + sleep 2 + fi + done + + echo "Iteration $iter_num: All $NUM_PARALLEL_RUNS instances launched. Waiting for this block to complete..." + # Wait for all background jobs started in this iteration's block to complete + wait + echo "Iteration $iter_num: All $NUM_PARALLEL_RUNS instances in this block have completed." + +done + +echo "---------------------------------------------------" +echo "All $NUM_ITERATIONS iteration(s) completed." \ No newline at end of file diff --git a/scripts/run_funsearch_smaller_budget.sh b/scripts/run_funsearch_smaller_budget.sh new file mode 100755 index 0000000..ae26791 --- /dev/null +++ b/scripts/run_funsearch_smaller_budget.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Script to run FunSearch WITH the abstraction mechanism ENABLED, using Hydra defaults. + +echo "Running FunSearch experiment with abstraction enabled (using defaults from mid.yaml)..." + +# --- Configurable Parameters --- +# Most parameters are now expected to be set in configs/funsearch/mid.yaml +# You can still override specific ones via the command line below if needed. +# OUTPUT_DIR is now managed by Hydra. +# --- End Configurable Parameters --- + +# Go to the project root directory +cd "$(dirname "$0")/.." + +# Hydra will create the output directory. + +echo "Output directory will be managed by Hydra." +echo "Running with defaults from mid.yaml and:" +echo " Abstraction: ENABLED" + +# Run the FunSearch main script using Hydra overrides +# Only enabling abstraction explicitly. +nohup python -m frame.funsearch.main \ + --config-name funsearch-low-budget \ + abstraction.enabled=true + +echo "FunSearch experiment with abstraction completed!" +# Output directory location depends on Hydra configuration (e.g., ./outputs/YYYY-MM-DD/HH-MM-SS) +echo "Results saved in Hydra output directory." \ No newline at end of file