Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions frame/configs/funsearch-mid.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,18 @@ defaults:

programs_database:
functions_per_prompt: 2
num_islands: 2
reset_period: 3600 # 1 hour in seconds
cluster_sampling_temperature_init: 0.1
cluster_sampling_temperature_period: 30000
num_islands: 4
reset_period: 360000 # 1 hour in seconds
cluster_sampling_temperature_init: 0.01
cluster_sampling_temperature_period: 1000
samples_per_prompt: 2
num_samplers: 2 # Single-threaded setup
num_evaluators: 2 # Single-threaded setup

# LLM Config (For frame.tools.llm_caller)
llm:
model_name: "gpt-4o-mini" # Less expensive model
temperature: 0.8
model_name: "gpt-4o" # Less expensive model
temperature: 1.0
top_p: 0.95
max_tokens: 1000 # Reduced token count
# API key should be set via environment variable OPENAI_API_KEY
Expand All @@ -39,8 +39,8 @@ prompt_yaml_path: ${hydra:runtime.cwd}/frame/configs/prompts/interestingness_pro
# tb_config_path: ??? # REQUIRED: Set via command line: python -m frame.funsearch.main tb_config_path=/path/to/your/tb_config.yaml
tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interestingness_funsearch.yaml" # Provide the default (which is the generate_interestingness exp)
frame:
evaluation_episodes_M: 8 # Reduced episodes count (M)
evaluation_timeout_seconds: 30 # Reduced timeout to 5 minutes
evaluation_episodes_M: 64 # Reduced episodes count (M)
evaluation_timeout_seconds: 120 # Reduced timeout to 5 minutes
save_eval_visualizations: true # Flag to enable visualizations during evaluation

# Template/Specification File
Expand All @@ -60,15 +60,15 @@ iterations: 128
# --- FunSearch Abstraction Settings ---
abstraction:
enabled: true # Master switch: set to false to disable the entire abstraction mechanism
frequency: 5 # Run the abstraction step every N main FunSearch iterations (low for testing).
frequency: 4 # Run the abstraction step every N main FunSearch iterations (low for testing).
programs_to_sample: 3 # How many top programs to analyze (low for testing).
max_abstractions_per_step: 2 # Target new abstractions per step (low for testing).
# Optional LLM override block for abstraction task
llm:
model_name: null # Default: null (use main llm config)
temperature: null # Default: null (use main llm config)
top_p: null # Default: null (use main llm config)
max_tokens: 4096 # Can override max tokens if needed
max_tokens: 8192 # Can override max tokens if needed
prompt_yaml_path: "${hydra:runtime.cwd}/frame/configs/prompts/abstraction_prompt.yaml" # Specific instructions for generating abstractions

# Hydra settings
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,21 +10,21 @@ defaults:
- _self_

programs_database:
functions_per_prompt: 1
functions_per_prompt: 2
num_islands: 1
reset_period: 3600 # 1 hour in seconds
cluster_sampling_temperature_init: 0.1
cluster_sampling_temperature_init: 0.01
cluster_sampling_temperature_period: 30000
samples_per_prompt: 2
num_samplers: 1 # Single-threaded setup
num_evaluators: 1 # Single-threaded setup

# LLM Config (For frame.tools.llm_caller)
llm:
model_name: "gpt-4o-mini" # Less expensive model
temperature: 0.8
model_name: "gpt-4o" # Less expensive model
temperature: 1.0
top_p: 0.95
max_tokens: 1000 # Reduced token count
max_tokens: 8192 # Reduced token count
# API key should be set via environment variable OPENAI_API_KEY

# Path to the interestingness prompt YAML file
Expand All @@ -36,7 +36,7 @@ prompt_yaml_path: ${hydra:runtime.cwd}/frame/configs/prompts/interestingness_pro
tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interestingness_funsearch.yaml" # Provide the default (which is the generate_interestingness exp)
frame:
evaluation_episodes_M: 1 # Reduced episodes count (M)
evaluation_timeout_seconds: 300 # Reduced timeout to 5 minutes
evaluation_timeout_seconds: 15 # Reduced timeout to 5 minutes
save_eval_visualizations: true # Flag to enable visualizations during evaluation

# Template/Specification File
Expand All @@ -54,7 +54,7 @@ evaluations_dir: "${hydra:runtime.output_dir}/evaluations"
initial_state: "${hydra:runtime.cwd}/frame/configs/theory_building/initial_state/succ_zero_eq.yaml"

# Iterations of the Funsearch sampling loop
iterations: 8
iterations: 2

# --- FunSearch Abstraction Settings ---
abstraction:
Expand Down
2 changes: 1 addition & 1 deletion frame/configs/models/gpt4o-mini.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: gpt-4o-mini
name: gpt-4o
provider: openai
temperature: 0.9
max_tokens: 8192
Expand Down
8 changes: 6 additions & 2 deletions frame/configs/prompts/abstraction_prompt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@ abstraction_user_prompt_template: |
Do not return any abstractions which are just the calculate_interestingness function themselves. Do not return any abstractions which do error-handling, or anything else unrelated to calculation ofthe interestingness.
Keep the documentation concise, explain the arguments and if the function outputs more than a float value (like a dictionary), explain what it returns in a condensed manner.
Do not name any abstractions you make the same name as any of the abstractions already provided, and do not name it `calculate_interestingness`
Make sure the abstractions you generate are syntactically correct and do not call variables which don't exist.

Importantly, please do not make abstractions which seem to do things already handled by other abstractions! Do not "abstract" the abstractions themselves, and do not make an entire given program an abstraction, you can and should do smaller subroutines.
Make sure the abstractions you generate are syntactically correct and do not call variables which don't exist. The abstractions you choose do not have to be common to many of the given functions, they should just abstract out useful operations which are not already handled by other abstractions.
You can compose abstractions if you think it makes sense to do so, but importantly, make sure to keep the abstractions *general* - do not hard-code values when they could be passed as arguments, for instance.
Keep in mind the examples in general of an entity will be a tuple of ints. YOU MUST PROVIDE THE IMPLEMENTATION OF THE ABSTRACTIONS YOU RETURN!
Also, make the description you provide a clear description of what operation is carried out in the implementation, for example if a measure involves a logarithm operation to implement a penalty, mention that in the description.

Here are the functions you are given:
{program_examples}

Expand Down
26 changes: 16 additions & 10 deletions frame/configs/prompts/interestingness_prompt.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ one_shot_prompt:
- You **MUST** respond with **only** the complete, syntactically correct Python code for the new function (`calculate_interestingness_vN`).
- Include the `def calculate_interestingness_vN(...):` signature line and the function body. Add a concise docstring.
- **DO NOT** include any introductory text, explanations, comments outside the function body, or usage examples in your response.
- If you use any of the primitives or abstractions, make sure you use them correctly. Provide the right inputs as described in the documentation given about the primitives!"
- If you use any of the primitives or abstractions, make sure you use them correctly. Provide the right inputs as described in the documentation given about the primitives!
- The descriptions of the primitives and abstractions indicate what arguments they take. Follow proper Python syntax. Watch out for potential division by zero errors."

template: |
def calculate_interestingness(entity_id: str, graph: KnowledgeGraph) -> float:
Expand Down Expand Up @@ -70,7 +71,7 @@ funsearch_system_prompt: |

Your specific task is to **generate a new, potentially improved version** of the `calculate_interestingness` function, named `calculate_interestingness_vN`. You should **analyze all the example functions** provided in the user prompt (`_v0` to `_v(N-1)`) to understand different successful strategies and potentially combine or adapt their ideas.

The function you write will receive `entity_id` (string) and `graph` (a `KnowledgeGraph` object) as input. You can use the following methods on the `graph` object to get information about the entity (`entity_id`) or the graph itself:
The function you write will receive `entity_id` (string) and `graph` (a `KnowledgeGraph` object) as input. You can use the following methods on the `graph` object to get information about the entity (`entity_id`) or the graph itself, the description explains what arguments it takes:
{primitives_section}

{abstraction_section}
Expand All @@ -84,7 +85,12 @@ funsearch_system_prompt: |
- Include the `def calculate_interestingness_vN(...):` signature line and the function body. Add a concise docstring.
- **DO NOT** include any introductory text, explanations, comments outside the function body, or usage examples in your response.
- Enclose the entire function definition within a single markdown code block like this:
- If you use any of the primitives or abstractions, make sure you use them correctly. Provide the right inputs as described in the documentation above!
- If you use any of the primitives or abstractions, make sure you use them correctly by supplying the proper arguments.
- Try not to rely on the abstractions alone - use them in a compositional way, where you also implement some of the logic yourself (passing interesting arguments to the abstractions counts).
- Try not to copy the examples exactly, but rather use them as inspiration to create a new, better, function that *can* be similar.
- You do not have to use all primitives, and you do not have to make extremely complex functions if you don't think it necessary.
- Watch out for potential division by zero errors.

```python
def calculate_interestingness_vN(entity_id: str, graph: KnowledgeGraph) -> float:
"""A new function version inspired by provided examples."""
Expand Down Expand Up @@ -134,13 +140,13 @@ primitive_categories:
primitives:
- name: "get_entity_node_type"
return_type: "float"
description: "(entity_id, graph): Returns numeric type (Concept=1, Conj=2, Thm=3)."
description: "(entity_id, graph): Returns 'Concept', 'Conjecture', or 'Theorem' depending on the type of the entity."
- name: "get_concept_category"
return_type: "float"
description: "(entity_id, graph): Returns numeric concept category (Predicate=1, Func=2,...)."
description: "(entity_id, graph): Returns 'Predicate', 'Function', or 'Constant' depending on the type of the entity."
- name: "get_input_arity"
return_type: "int"
description: "(entity_id, graph): Returns input arity."
description: "(entity_id, graph): Returns input arity of the entity.."
- name: "get_num_component_types"
return_type: "int"
description: "(entity_id, graph): Returns number of component types in examples."
Expand All @@ -149,11 +155,11 @@ primitive_categories:
description: "These primitives extract information about examples and non-examples of the entity."
primitives:
- name: "get_examples"
return_type: "List[Example]"
description: "(entity_id, graph): Returns list of positive examples."
return_type: "List[Tuple[Int]]"
description: "(entity_id, graph): Returns list of positive examples, each example is a tuple of ints."
- name: "get_nonexamples"
return_type: "List[Example]"
description: "(entity_id, graph): Returns list of negative examples."
return_type: "List[Tuple[Int]]"
description: "(entity_id, graph): Returns list of negative examples, each example is a tuple of ints."

"Construction Step Primitives":
description: "These primitives extract information about how the entity was constructed."
Expand Down
8 changes: 4 additions & 4 deletions frame/configs/succ_zero_base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
defaults:
- theory_building/production_rules: default # Use all available rules
- theory_building/interestingness: default # Use default interestingness measures
- theory_building/initial_state: succ_zero # Default initial state
- theory_building/initial_state: succ_zero_eq # Default initial state
# Note(George; 4/25): This is the initial state that will be used, can be subbed out for: succ_zero, succ_zero_eq and arithmetic_basic
- interestingness: default # Use default HR interestingness functions
- _self_
Expand All @@ -15,9 +15,9 @@ experiment:
description: "Discover mathematical concepts starting from just successor and zero"
num_episodes: 1
seed: 0
max_steps: 80 # Default max steps
episode_timeout_seconds: 60 # Timeout for the main process waiting for a worker episode result
num_workers: 1 # Number of parallel worker processes
max_steps: 1000 # Default max steps
episode_timeout_seconds: 120 # Timeout for the main process waiting for a worker episode result
num_workers: 64 # Number of parallel worker processes

# Timeout settings
timeouts:
Expand Down
16 changes: 8 additions & 8 deletions frame/configs/succ_zero_generate_interestingness_funsearch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ initial_state:
experiment:
name: "succ_zero_multi_generated_interestingness_eval"
description: "Generate N interestingness functions and evaluate each M times, starting from successor and zero."
max_steps: 80
num_episodes: 20
num_workers: 8
seed: ${oc.env:RANDOM_SEED,12345}
episode_timeout_seconds: 15 # Timeout for the main process waiting for a worker episode result
max_steps: 1000
num_episodes: 64
num_workers: 64
seed: 0 #${oc.env:RANDOM_SEED,12345}
episode_timeout_seconds: 120 # Timeout for the main process waiting for a worker episode result

# --- New settings for N x M Evaluation ---
evaluate_multiple_interestingness: True
Expand All @@ -54,13 +54,13 @@ policy:
type: interestingness_guided
params:
concept_selection: INTERESTINGNESS
action_selection: RULE_BASED_RANDOM # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified.
top_k_concepts: 5
action_selection: SIMULATE_AND_SCORE # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified.
top_k_concepts: 8
temperature: 1.0
generate_interestingness: True
interestingness_function_path: ""
action_selection_params:
simulation_limit: 20
simulation_limit: 3

# Production Rules configuration (Explicitly added for manual loading via OmegaConf.load)
production_rules:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ policy:
params:
concept_selection: INTERESTINGNESS
action_selection: SIMULATE_AND_SCORE # RULE_BASED_RANDOM # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified.
top_k_concepts: 5
top_k_concepts: 8
temperature: 0.5
generate_interestingness: True
interestingness_function_path: ""
Expand Down
2 changes: 1 addition & 1 deletion frame/configs/succ_zero_standard_experiment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ policy:
type: interestingness_guided
params:
concept_selection: INTERESTINGNESS
top_k_concepts: 5
top_k_concepts: 8
temperature: 0.5
simulation_limit: 3
action_selection: SIMULATE_AND_SCORE # RULE_BASED_RANDOM
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import_from: "frame.knowledge_base.initial_concepts"
concepts:
- "create_zero_concept"
- "create_one_concept"
- "create_two_concept"
- "create_addition_concept"
- "create_multiplication_concept"
- "create_divides_concept"
Expand Down
33 changes: 28 additions & 5 deletions frame/funsearch/implementation/abstraction_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import dataclasses
from typing import Optional, List, Dict
import logging
import textwrap
import random # Add random import

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -52,11 +54,32 @@ def format_for_sampler_prompt(self) -> str:
return " (No abstraction functions available in this island yet.)"

prompt_lines = ["**Available Abstraction Functions:**"]
# Sort alphabetically for consistent prompt appearance
sorted_abstractions = sorted(self._abstractions.values(), key=lambda x: x.name)
for abs_obj in sorted_abstractions:
# Format consistently with DSL primitives if possible
prompt_lines.append(f"- `{abs_obj.name}{abs_obj.signature}`: {abs_obj.description}")

abstractions_list = list(self._abstractions.values()) # Get a list of abstractions
num_abstractions = len(abstractions_list)

indices_to_show_code = []
if num_abstractions > 0:
# Determine how many to pick (up to 2)
k = min(num_abstractions, 2)
indices_to_show_code = random.sample(range(num_abstractions), k)

# Sort alphabetically for consistent prompt appearance (after random selection)
# We sort the original list, then iterate with original indices
sorted_abstractions_with_original_indices = sorted(
[(i, abs_obj) for i, abs_obj in enumerate(abstractions_list)],
key=lambda x: x[1].name
)

for original_idx, abs_obj in sorted_abstractions_with_original_indices:
base_info = f"- `{abs_obj.name}{abs_obj.signature}`: {abs_obj.description}"
if original_idx in indices_to_show_code:
prompt_lines.append(base_info)
prompt_lines.append(" ```python")
prompt_lines.append(textwrap.indent(abs_obj.code, ' '))
prompt_lines.append(" ```")
else:
prompt_lines.append(base_info)
return "\n".join(prompt_lines)

def generate_definitions_for_sandbox(self) -> str:
Expand Down
4 changes: 2 additions & 2 deletions frame/funsearch/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def __init__(self, cfg: DictConfig):
# --- Safeguard for excessive parallelism ---
# Access TheoryBuilder's num_workers from the base config loaded by the sandbox
tb_base_cfg = self.sandbox.base_cfg
tb_num_workers = omegaconf.OmegaConf.select(tb_base_cfg, 'experiment.num_workers', default=1)
tb_num_workers = omegaconf.OmegaConf.select(tb_base_cfg, 'experiment.num_workers', default=64)

# Access num_islands from FunSearch config (after database setup)
num_islands = self.cfg.programs_database.num_islands
Expand All @@ -145,7 +145,7 @@ def __init__(self, cfg: DictConfig):
f"FunSearch programs_database.num_islands ({num_islands}) = {total_potential_tb_workers}")

# The limit is < 64, so >= 64 should raise an error.
WORKER_LIMIT = 64
WORKER_LIMIT = 256
if total_potential_tb_workers > WORKER_LIMIT:
error_msg = (
f"Potential concurrent TheoryBuilder worker count ({total_potential_tb_workers}) meets or exceeds limit of {WORKER_LIMIT}. "
Expand Down
10 changes: 5 additions & 5 deletions frame/interestingness/learning/algorithms/one_shot_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,9 +129,9 @@ def _load_model_config(self, config_path: str) -> Any:
# Create a default config
class DefaultConfig:
def __init__(self):
self.name = "gpt-4o-mini"
self.temperature = 0.7
self.max_tokens = 4000
self.name = "gpt-4o"
self.temperature = 1.0
self.max_tokens = 4096
self.system_prompt = "You are a mathematical assistant that specializes in creating interestingness functions."

return DefaultConfig()
Expand Down Expand Up @@ -711,8 +711,8 @@ async def main():

# Create a generator
generator = OneShotLLMGenerator(
model="gpt-4o-mini",
temperature=0.7,
model="gpt-4o",
temperature=1.0,
logger=logger
)

Expand Down
Loading