Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 79 additions & 0 deletions frame/configs/funsearch-low-budget.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# Low settings configuration for FunSearch
# Based on funsearch.implementation.config but with minimal resources

defaults:
- theory_building/production_rules: default # Use all available rules
- theory_building/interestingness: default # Use default interestingness measures
- theory_building/initial_state: arithmetic_basic # Default initial state
# Note(George; 4/25): This is the initial state that will be used, can be subbed out for: succ_zero, succ_zero_eq and arithmetic_basic
- interestingness: default # Use default HR interestingness functions
- _self_

# Note(George; 5/5): The defaults above do not load properly in funsearch experiments due to a bug in the recursive config loading.
# So I am just adding


programs_database:
functions_per_prompt: 2
num_islands: 3
reset_period: 360000 # 1 hour in seconds
cluster_sampling_temperature_init: 0.01
cluster_sampling_temperature_period: 1000
samples_per_prompt: 2
num_samplers: 2 # Single-threaded setup
num_evaluators: 2 # Single-threaded setup

# LLM Config (For frame.tools.llm_caller)
llm:
model_name: "gpt-4o" # Less expensive model
temperature: 1.0
top_p: 0.95
max_tokens: 16384 # Reduced token count
# API key should be set via environment variable OPENAI_API_KEY

# Path to the interestingness prompt YAML file
prompt_yaml_path: ${hydra:runtime.cwd}/frame/configs/prompts/interestingness_prompt.yaml

# FRAME Evaluation Settings (used by TheoryBuilderSandbox)
# NOTE: base_theory_builder_config is now expected via command-line override or parent config
# tb_config_path: ??? # REQUIRED: Set via command line: python -m frame.funsearch.main tb_config_path=/path/to/your/tb_config.yaml
tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interestingness_funsearch.yaml" # Provide the default (which is the generate_interestingness exp)
frame:
evaluation_episodes_M: 16 # Reduced episodes count (M)
evaluation_timeout_seconds: 30 # Reduced timeout to 5 minutes
save_eval_visualizations: false # Flag to enable visualizations during evaluation

# Template/Specification File
spec_path: "${hydra:runtime.cwd}/frame/funsearch/interestingness_spec.py"

# Persistence
database_backup_path: "${hydra:runtime.output_dir}/population_backup.pkl"
backup_frequency_seconds: 300 # Save DB every 5 minutes

# Output Dirs
output_directory: "${hydra:runtime.output_dir}"
evaluations_dir: "${hydra:runtime.output_dir}/evaluations"

# Iterations of the Funsearch sampling loop
iterations: 32

# --- FunSearch Abstraction Settings ---
abstraction:
enabled: true # Master switch: set to false to disable the entire abstraction mechanism
frequency: 3 # Run the abstraction step every N main FunSearch iterations (low for testing).
programs_to_sample: 2 # How many top programs to analyze (low for testing).
max_abstractions_per_step: 2 # Target new abstractions per step (low for testing).
# Optional LLM override block for abstraction task
llm:
model_name: null # Default: null (use main llm config)
temperature: null # Default: null (use main llm config)
top_p: null # Default: null (use main llm config)
max_tokens: 8192 # Can override max tokens if needed
prompt_yaml_path: "${hydra:runtime.cwd}/frame/configs/prompts/abstraction_prompt.yaml" # Specific instructions for generating abstractions

# Hydra settings
hydra:
run:
dir: outputs/funsearch_runs/${now:%Y-%m-%d}/${now:%H-%M-%S}
job:
chdir: true
4 changes: 2 additions & 2 deletions frame/configs/funsearch-mid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ llm:
model_name: "gpt-4o" # Less expensive model
temperature: 1.0
top_p: 0.95
max_tokens: 1000 # Reduced token count
max_tokens: 16384 # Reduced token count
# API key should be set via environment variable OPENAI_API_KEY

# Path to the interestingness prompt YAML file
Expand All @@ -41,7 +41,7 @@ tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interesti
frame:
evaluation_episodes_M: 64 # Reduced episodes count (M)
evaluation_timeout_seconds: 120 # Reduced timeout to 5 minutes
save_eval_visualizations: true # Flag to enable visualizations during evaluation
save_eval_visualizations: false # Flag to enable visualizations during evaluation

# Template/Specification File
spec_path: "${hydra:runtime.cwd}/frame/funsearch/interestingness_spec.py"
Expand Down
25 changes: 21 additions & 4 deletions frame/configs/succ_zero_generate_interestingness_funsearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,32 @@ initial_state:
- "create_successor_concept"
- "create_equality_concept"

# initial_state:
# _target_: frame.knowledge_base.knowledge_graph.KnowledgeGraph
# name: "arithmetic_basic_initial"
# description: "Initial knowledge graph with arithmetic basic concepts"

# # Import settings
# import_from: "frame.knowledge_base.initial_concepts"
# concepts:
# - "create_zero_concept"
# - "create_one_concept"
# - "create_two_concept"
# - "create_addition_concept"
# - "create_multiplication_concept"
# - "create_divides_concept"
# - "create_leq_than_concept"
# - "create_equality_concept"

# Experiment settings override
experiment:
name: "succ_zero_multi_generated_interestingness_eval"
description: "Generate N interestingness functions and evaluate each M times, starting from successor and zero."
max_steps: 1000
num_episodes: 64
num_workers: 64
num_episodes: 16
num_workers: 2
seed: 0 #${oc.env:RANDOM_SEED,12345}
episode_timeout_seconds: 120 # Timeout for the main process waiting for a worker episode result
episode_timeout_seconds: 30 # Timeout for the main process waiting for a worker episode result

# --- New settings for N x M Evaluation ---
evaluate_multiple_interestingness: True
Expand All @@ -55,7 +72,7 @@ policy:
params:
concept_selection: INTERESTINGNESS
action_selection: SIMULATE_AND_SCORE # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified.
top_k_concepts: 8
top_k_concepts: 4
temperature: 1.0
generate_interestingness: True
interestingness_function_path: ""
Expand Down
74 changes: 73 additions & 1 deletion frame/funsearch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def __init__(self, cfg: DictConfig):
"""
self.cfg = cfg
self.iteration_rewards_log: List[Tuple[int, float, Optional[str]]] = [] # To store (iteration, reward, best_program_code_at_iteration_end)
self.island_progress_log: List[Tuple[int, int, float]] = [] # To store (iteration, island_id, best_score_for_island_at_iteration)

# Store the path to the TheoryBuilder base configuration
if not hasattr(self.cfg, 'tb_config_path') or not self.cfg.tb_config_path:
Expand Down Expand Up @@ -453,7 +454,15 @@ def run(self, iterations: int = 10):
# Run one round of sampling and evaluation (mutation)
logger.info("Running Mutation Sampler...")
# Sampler uses the latest state of the database (potentially updated by abstraction)
self.mutation_sampler.sample(iteration=i)
self.mutation_sampler.sample(iteration=i)

# --- Populate island_progress_log ---
if hasattr(self.programs_db, '_best_score_per_island') and self.programs_db._best_score_per_island:
num_islands_for_log = len(self.programs_db._best_score_per_island)
for island_idx in range(num_islands_for_log):
current_island_score = self.programs_db._best_score_per_island[island_idx]
self.island_progress_log.append((i + 1, island_idx, current_island_score))
# --- End populate island_progress_log ---

# Save the database periodically
self.save_database()
Expand Down Expand Up @@ -485,6 +494,8 @@ def run(self, iterations: int = 10):
self._log_final_abstraction_summary()
# Log iteration rewards summary
self._log_iteration_rewards()
# Log island progress summary
self._log_island_progress_summary()

def _log_iteration_rewards(self):
"""Logs the best reward achieved at the end of each iteration, followed by a simple list of rewards."""
Expand All @@ -507,6 +518,67 @@ def _log_iteration_rewards(self):

logger.info("--- End Iteration Rewards Summary ---")

def _log_island_progress_summary(self):
"""Logs a summary of the best score achieved by each island at each iteration."""
logger.info("--- Island Progress Summary ---") # Simplified title

# Part 1: Per-iteration scores (current bests at that iteration)
logger.info("--- Per-Iteration Island Best Scores ---")
if not self.island_progress_log:
logger.info("No island progress recorded for per-iteration view.")
else:
iterations_logged = sorted(list(set(item[0] for item in self.island_progress_log)))
for iteration_num in iterations_logged:
logger.info(f" Iteration {iteration_num}:")
iteration_specific_logs = [log for log in self.island_progress_log if log[0] == iteration_num]
iteration_specific_logs.sort(key=lambda x: x[1])
for _, island_id, score in iteration_specific_logs:
score_str = f"{score:.4f}" if isinstance(score, float) and score != float('-inf') else str(score)
logger.info(f" Island {island_id}: Current Best Score = {score_str}")

# Part 2: NEW - Historical best scores per island (list of scores over iterations)
logger.info("\n--- Historical Best Scores Per Island (across iterations) ---")
if not self.island_progress_log:
logger.info("No island progress recorded to show historical scores.")
else:
try:
num_islands = self.cfg.programs_database.num_islands
all_island_historical_scores = {island_idx: [] for island_idx in range(num_islands)}

# Temporary structure to hold (iteration, score) tuples for sorting
temp_island_scores_with_iter = {island_idx: [] for island_idx in range(num_islands)}

for iteration_num, island_id, score in self.island_progress_log:
if 0 <= island_id < num_islands: # Ensure island_id is valid
temp_island_scores_with_iter[island_id].append((iteration_num, score))

for island_idx in range(num_islands):
# Sort by iteration number before extracting scores
sorted_scores_for_island = sorted(temp_island_scores_with_iter[island_idx], key=lambda x: x[0])
# Extract just the scores for the list
historical_scores_list = [
f"{s:.4f}" if isinstance(s, float) and s != float('-inf') else str(s)
for _, s in sorted_scores_for_island
]
all_island_historical_scores[island_idx] = historical_scores_list
logger.info(f" Island {island_idx}: Best scores over iterations = {all_island_historical_scores[island_idx]}")
except Exception as e:
logger.error(f"Error generating historical scores per island: {e}", exc_info=True)
logger.info("Could not reliably determine number of islands or process historical data.")

# Part 3: Final best scores per island (single best score at the end)
logger.info("\n--- Final Best Scores Per Island ---")
if hasattr(self.programs_db, '_best_score_per_island') and self.programs_db._best_score_per_island:
final_best_scores_values = []
for island_idx, score in enumerate(self.programs_db._best_score_per_island):
score_str = f"{score:.4f}" if isinstance(score, float) and score != float('-inf') else str(score)
logger.info(f" Island {island_idx}: Final Best Score = {score_str}")
final_best_scores_values.append(score)
logger.info(f"List of final best scores across islands: {[f'{s:.4f}' if isinstance(s, float) and s != float('-inf') else str(s) for s in final_best_scores_values]}")
else:
logger.info("Could not retrieve final best scores per island from the database.")
logger.info("--- End Island Progress Summary ---")

def _get_overall_best_program_info(self) -> Tuple[float, Optional[str], int]:
"""Helper to get current best program's reward, code, and island ID."""
best_reward = float('-inf')
Expand Down
52 changes: 52 additions & 0 deletions run_parallel_funsearch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
#!/bin/bash

# Configuration
NUM_PARALLEL_RUNS=32 # N: Number of parallel FunSearch instances per iteration
FUNSEARCH_SCRIPT="scripts/run_funsearch_smaller_budget.sh" # Relative path from project root
NUM_ITERATIONS=1 # M: Number of times to run the block of N parallel instances

# This script assumes it is run from the project root directory (e.g., ~/Projects/FRAME)
# The FUNSEARCH_SCRIPT path is relative to this root.

# Check if the FunSearch script exists at the expected relative path
if [ ! -f "$FUNSEARCH_SCRIPT" ]; then
echo "Error: FunSearch script not found at $(pwd)/$FUNSEARCH_SCRIPT"
echo "Please ensure you are running this script from the project root directory (e.g., ~/Projects/FRAME)."
exit 1
fi

# Ensure the target script is executable
chmod +x "$FUNSEARCH_SCRIPT"

echo "Starting $NUM_ITERATIONS iteration(s) of $NUM_PARALLEL_RUNS parallel FunSearch runs each."
echo "Running from: $(pwd)" # Should be the project root

for iter_num in $(seq 1 $NUM_ITERATIONS)
do
echo "---------------------------------------------------"
echo "Starting Iteration $iter_num of $NUM_ITERATIONS..."
echo "---------------------------------------------------"

for i in $(seq 1 $NUM_PARALLEL_RUNS)
do
echo "Iteration $iter_num, Launching instance $i of $NUM_PARALLEL_RUNS: $FUNSEARCH_SCRIPT"
# The FUNSEARCH_SCRIPT itself (e.g., scripts/run_funsearch_test.sh) handles cd-ing to the project root.
# We call it using its path relative to the current directory (which should be project root).
(bash "$FUNSEARCH_SCRIPT" &)

# Wait for 2 seconds before launching the next instance, if it's not the last one in this block.
if [ "$i" -lt "$NUM_PARALLEL_RUNS" ]; then
echo "Iteration $iter_num, Instance $i: Waiting 2 seconds before launching next instance..."
sleep 2
fi
done

echo "Iteration $iter_num: All $NUM_PARALLEL_RUNS instances launched. Waiting for this block to complete..."
# Wait for all background jobs started in this iteration's block to complete
wait
echo "Iteration $iter_num: All $NUM_PARALLEL_RUNS instances in this block have completed."

done

echo "---------------------------------------------------"
echo "All $NUM_ITERATIONS iteration(s) completed."
29 changes: 29 additions & 0 deletions scripts/run_funsearch_smaller_budget.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
#!/bin/bash
# Script to run FunSearch WITH the abstraction mechanism ENABLED, using Hydra defaults.

echo "Running FunSearch experiment with abstraction enabled (using defaults from mid.yaml)..."

# --- Configurable Parameters ---
# Most parameters are now expected to be set in configs/funsearch/mid.yaml
# You can still override specific ones via the command line below if needed.
# OUTPUT_DIR is now managed by Hydra.
# --- End Configurable Parameters ---

# Go to the project root directory
cd "$(dirname "$0")/.."

# Hydra will create the output directory.

echo "Output directory will be managed by Hydra."
echo "Running with defaults from mid.yaml and:"
echo " Abstraction: ENABLED"

# Run the FunSearch main script using Hydra overrides
# Only enabling abstraction explicitly.
nohup python -m frame.funsearch.main \
--config-name funsearch-low-budget \
abstraction.enabled=true

echo "FunSearch experiment with abstraction completed!"
# Output directory location depends on Hydra configuration (e.g., ./outputs/YYYY-MM-DD/HH-MM-SS)
echo "Results saved in Hydra output directory."