diff --git a/frame/configs/funsearch-mid.yaml b/frame/configs/funsearch-mid.yaml index d174000..30b61d2 100644 --- a/frame/configs/funsearch-mid.yaml +++ b/frame/configs/funsearch-mid.yaml @@ -15,18 +15,18 @@ defaults: programs_database: functions_per_prompt: 2 - num_islands: 2 - reset_period: 3600 # 1 hour in seconds - cluster_sampling_temperature_init: 0.1 - cluster_sampling_temperature_period: 30000 + num_islands: 4 + reset_period: 360000 # 1 hour in seconds + cluster_sampling_temperature_init: 0.01 + cluster_sampling_temperature_period: 1000 samples_per_prompt: 2 num_samplers: 2 # Single-threaded setup num_evaluators: 2 # Single-threaded setup # LLM Config (For frame.tools.llm_caller) llm: - model_name: "gpt-4o-mini" # Less expensive model - temperature: 0.8 + model_name: "gpt-4o" # Less expensive model + temperature: 1.0 top_p: 0.95 max_tokens: 1000 # Reduced token count # API key should be set via environment variable OPENAI_API_KEY @@ -39,8 +39,8 @@ prompt_yaml_path: ${hydra:runtime.cwd}/frame/configs/prompts/interestingness_pro # tb_config_path: ??? # REQUIRED: Set via command line: python -m frame.funsearch.main tb_config_path=/path/to/your/tb_config.yaml tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interestingness_funsearch.yaml" # Provide the default (which is the generate_interestingness exp) frame: - evaluation_episodes_M: 8 # Reduced episodes count (M) - evaluation_timeout_seconds: 30 # Reduced timeout to 5 minutes + evaluation_episodes_M: 64 # Reduced episodes count (M) + evaluation_timeout_seconds: 120 # Reduced timeout to 5 minutes save_eval_visualizations: true # Flag to enable visualizations during evaluation # Template/Specification File @@ -60,7 +60,7 @@ iterations: 128 # --- FunSearch Abstraction Settings --- abstraction: enabled: true # Master switch: set to false to disable the entire abstraction mechanism - frequency: 5 # Run the abstraction step every N main FunSearch iterations (low for testing). + frequency: 4 # Run the abstraction step every N main FunSearch iterations (low for testing). programs_to_sample: 3 # How many top programs to analyze (low for testing). max_abstractions_per_step: 2 # Target new abstractions per step (low for testing). # Optional LLM override block for abstraction task @@ -68,7 +68,7 @@ abstraction: model_name: null # Default: null (use main llm config) temperature: null # Default: null (use main llm config) top_p: null # Default: null (use main llm config) - max_tokens: 4096 # Can override max tokens if needed + max_tokens: 8192 # Can override max tokens if needed prompt_yaml_path: "${hydra:runtime.cwd}/frame/configs/prompts/abstraction_prompt.yaml" # Specific instructions for generating abstractions # Hydra settings diff --git a/frame/configs/funsearch-low.yaml b/frame/configs/funsearch-test.yaml similarity index 93% rename from frame/configs/funsearch-low.yaml rename to frame/configs/funsearch-test.yaml index d3ea648..91c10fb 100644 --- a/frame/configs/funsearch-low.yaml +++ b/frame/configs/funsearch-test.yaml @@ -10,10 +10,10 @@ defaults: - _self_ programs_database: - functions_per_prompt: 1 + functions_per_prompt: 2 num_islands: 1 reset_period: 3600 # 1 hour in seconds - cluster_sampling_temperature_init: 0.1 + cluster_sampling_temperature_init: 0.01 cluster_sampling_temperature_period: 30000 samples_per_prompt: 2 num_samplers: 1 # Single-threaded setup @@ -21,10 +21,10 @@ num_evaluators: 1 # Single-threaded setup # LLM Config (For frame.tools.llm_caller) llm: - model_name: "gpt-4o-mini" # Less expensive model - temperature: 0.8 + model_name: "gpt-4o" # Less expensive model + temperature: 1.0 top_p: 0.95 - max_tokens: 1000 # Reduced token count + max_tokens: 8192 # Reduced token count # API key should be set via environment variable OPENAI_API_KEY # Path to the interestingness prompt YAML file @@ -36,7 +36,7 @@ prompt_yaml_path: ${hydra:runtime.cwd}/frame/configs/prompts/interestingness_pro tb_config_path: "${hydra:runtime.cwd}/frame/configs/succ_zero_generate_interestingness_funsearch.yaml" # Provide the default (which is the generate_interestingness exp) frame: evaluation_episodes_M: 1 # Reduced episodes count (M) - evaluation_timeout_seconds: 300 # Reduced timeout to 5 minutes + evaluation_timeout_seconds: 15 # Reduced timeout to 5 minutes save_eval_visualizations: true # Flag to enable visualizations during evaluation # Template/Specification File @@ -54,7 +54,7 @@ evaluations_dir: "${hydra:runtime.output_dir}/evaluations" initial_state: "${hydra:runtime.cwd}/frame/configs/theory_building/initial_state/succ_zero_eq.yaml" # Iterations of the Funsearch sampling loop -iterations: 8 +iterations: 2 # --- FunSearch Abstraction Settings --- abstraction: diff --git a/frame/configs/models/gpt4o-mini.yaml b/frame/configs/models/gpt4o-mini.yaml index 18dcbb8..f1a779c 100644 --- a/frame/configs/models/gpt4o-mini.yaml +++ b/frame/configs/models/gpt4o-mini.yaml @@ -1,4 +1,4 @@ -name: gpt-4o-mini +name: gpt-4o provider: openai temperature: 0.9 max_tokens: 8192 diff --git a/frame/configs/prompts/abstraction_prompt.yaml b/frame/configs/prompts/abstraction_prompt.yaml index 9e7ee4e..aa884f2 100644 --- a/frame/configs/prompts/abstraction_prompt.yaml +++ b/frame/configs/prompts/abstraction_prompt.yaml @@ -16,8 +16,12 @@ abstraction_user_prompt_template: | Do not return any abstractions which are just the calculate_interestingness function themselves. Do not return any abstractions which do error-handling, or anything else unrelated to calculation ofthe interestingness. Keep the documentation concise, explain the arguments and if the function outputs more than a float value (like a dictionary), explain what it returns in a condensed manner. Do not name any abstractions you make the same name as any of the abstractions already provided, and do not name it `calculate_interestingness` - Make sure the abstractions you generate are syntactically correct and do not call variables which don't exist. - + Importantly, please do not make abstractions which seem to do things already handled by other abstractions! Do not "abstract" the abstractions themselves, and do not make an entire given program an abstraction, you can and should do smaller subroutines. + Make sure the abstractions you generate are syntactically correct and do not call variables which don't exist. The abstractions you choose do not have to be common to many of the given functions, they should just abstract out useful operations which are not already handled by other abstractions. + You can compose abstractions if you think it makes sense to do so, but importantly, make sure to keep the abstractions *general* - do not hard-code values when they could be passed as arguments, for instance. + Keep in mind the examples in general of an entity will be a tuple of ints. YOU MUST PROVIDE THE IMPLEMENTATION OF THE ABSTRACTIONS YOU RETURN! + Also, make the description you provide a clear description of what operation is carried out in the implementation, for example if a measure involves a logarithm operation to implement a penalty, mention that in the description. + Here are the functions you are given: {program_examples} diff --git a/frame/configs/prompts/interestingness_prompt.yaml b/frame/configs/prompts/interestingness_prompt.yaml index d532d0f..d6e7d39 100644 --- a/frame/configs/prompts/interestingness_prompt.yaml +++ b/frame/configs/prompts/interestingness_prompt.yaml @@ -36,7 +36,8 @@ one_shot_prompt: - You **MUST** respond with **only** the complete, syntactically correct Python code for the new function (`calculate_interestingness_vN`). - Include the `def calculate_interestingness_vN(...):` signature line and the function body. Add a concise docstring. - **DO NOT** include any introductory text, explanations, comments outside the function body, or usage examples in your response. - - If you use any of the primitives or abstractions, make sure you use them correctly. Provide the right inputs as described in the documentation given about the primitives!" + - If you use any of the primitives or abstractions, make sure you use them correctly. Provide the right inputs as described in the documentation given about the primitives! + - The descriptions of the primitives and abstractions indicate what arguments they take. Follow proper Python syntax. Watch out for potential division by zero errors." template: | def calculate_interestingness(entity_id: str, graph: KnowledgeGraph) -> float: @@ -70,7 +71,7 @@ funsearch_system_prompt: | Your specific task is to **generate a new, potentially improved version** of the `calculate_interestingness` function, named `calculate_interestingness_vN`. You should **analyze all the example functions** provided in the user prompt (`_v0` to `_v(N-1)`) to understand different successful strategies and potentially combine or adapt their ideas. - The function you write will receive `entity_id` (string) and `graph` (a `KnowledgeGraph` object) as input. You can use the following methods on the `graph` object to get information about the entity (`entity_id`) or the graph itself: + The function you write will receive `entity_id` (string) and `graph` (a `KnowledgeGraph` object) as input. You can use the following methods on the `graph` object to get information about the entity (`entity_id`) or the graph itself, the description explains what arguments it takes: {primitives_section} {abstraction_section} @@ -84,7 +85,12 @@ funsearch_system_prompt: | - Include the `def calculate_interestingness_vN(...):` signature line and the function body. Add a concise docstring. - **DO NOT** include any introductory text, explanations, comments outside the function body, or usage examples in your response. - Enclose the entire function definition within a single markdown code block like this: - - If you use any of the primitives or abstractions, make sure you use them correctly. Provide the right inputs as described in the documentation above! + - If you use any of the primitives or abstractions, make sure you use them correctly by supplying the proper arguments. + - Try not to rely on the abstractions alone - use them in a compositional way, where you also implement some of the logic yourself (passing interesting arguments to the abstractions counts). + - Try not to copy the examples exactly, but rather use them as inspiration to create a new, better, function that *can* be similar. + - You do not have to use all primitives, and you do not have to make extremely complex functions if you don't think it necessary. + - Watch out for potential division by zero errors. + ```python def calculate_interestingness_vN(entity_id: str, graph: KnowledgeGraph) -> float: """A new function version inspired by provided examples.""" @@ -134,13 +140,13 @@ primitive_categories: primitives: - name: "get_entity_node_type" return_type: "float" - description: "(entity_id, graph): Returns numeric type (Concept=1, Conj=2, Thm=3)." + description: "(entity_id, graph): Returns 'Concept', 'Conjecture', or 'Theorem' depending on the type of the entity." - name: "get_concept_category" return_type: "float" - description: "(entity_id, graph): Returns numeric concept category (Predicate=1, Func=2,...)." + description: "(entity_id, graph): Returns 'Predicate', 'Function', or 'Constant' depending on the type of the entity." - name: "get_input_arity" return_type: "int" - description: "(entity_id, graph): Returns input arity." + description: "(entity_id, graph): Returns input arity of the entity.." - name: "get_num_component_types" return_type: "int" description: "(entity_id, graph): Returns number of component types in examples." @@ -149,11 +155,11 @@ primitive_categories: description: "These primitives extract information about examples and non-examples of the entity." primitives: - name: "get_examples" - return_type: "List[Example]" - description: "(entity_id, graph): Returns list of positive examples." + return_type: "List[Tuple[Int]]" + description: "(entity_id, graph): Returns list of positive examples, each example is a tuple of ints." - name: "get_nonexamples" - return_type: "List[Example]" - description: "(entity_id, graph): Returns list of negative examples." + return_type: "List[Tuple[Int]]" + description: "(entity_id, graph): Returns list of negative examples, each example is a tuple of ints." "Construction Step Primitives": description: "These primitives extract information about how the entity was constructed." diff --git a/frame/configs/succ_zero_base.yaml b/frame/configs/succ_zero_base.yaml index 0cd46af..3d031c7 100644 --- a/frame/configs/succ_zero_base.yaml +++ b/frame/configs/succ_zero_base.yaml @@ -4,7 +4,7 @@ defaults: - theory_building/production_rules: default # Use all available rules - theory_building/interestingness: default # Use default interestingness measures - - theory_building/initial_state: succ_zero # Default initial state + - theory_building/initial_state: succ_zero_eq # Default initial state # Note(George; 4/25): This is the initial state that will be used, can be subbed out for: succ_zero, succ_zero_eq and arithmetic_basic - interestingness: default # Use default HR interestingness functions - _self_ @@ -15,9 +15,9 @@ experiment: description: "Discover mathematical concepts starting from just successor and zero" num_episodes: 1 seed: 0 - max_steps: 80 # Default max steps - episode_timeout_seconds: 60 # Timeout for the main process waiting for a worker episode result - num_workers: 1 # Number of parallel worker processes + max_steps: 1000 # Default max steps + episode_timeout_seconds: 120 # Timeout for the main process waiting for a worker episode result + num_workers: 64 # Number of parallel worker processes # Timeout settings timeouts: diff --git a/frame/configs/succ_zero_generate_interestingness_funsearch.yaml b/frame/configs/succ_zero_generate_interestingness_funsearch.yaml index ad47f2b..12bc3f1 100644 --- a/frame/configs/succ_zero_generate_interestingness_funsearch.yaml +++ b/frame/configs/succ_zero_generate_interestingness_funsearch.yaml @@ -24,11 +24,11 @@ initial_state: experiment: name: "succ_zero_multi_generated_interestingness_eval" description: "Generate N interestingness functions and evaluate each M times, starting from successor and zero." - max_steps: 80 - num_episodes: 20 - num_workers: 8 - seed: ${oc.env:RANDOM_SEED,12345} - episode_timeout_seconds: 15 # Timeout for the main process waiting for a worker episode result + max_steps: 1000 + num_episodes: 64 + num_workers: 64 + seed: 0 #${oc.env:RANDOM_SEED,12345} + episode_timeout_seconds: 120 # Timeout for the main process waiting for a worker episode result # --- New settings for N x M Evaluation --- evaluate_multiple_interestingness: True @@ -54,13 +54,13 @@ policy: type: interestingness_guided params: concept_selection: INTERESTINGNESS - action_selection: RULE_BASED_RANDOM # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified. - top_k_concepts: 5 + action_selection: SIMULATE_AND_SCORE # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified. + top_k_concepts: 8 temperature: 1.0 generate_interestingness: True interestingness_function_path: "" action_selection_params: - simulation_limit: 20 + simulation_limit: 3 # Production Rules configuration (Explicitly added for manual loading via OmegaConf.load) production_rules: diff --git a/frame/configs/succ_zero_generated_interestingness_experiment.yaml b/frame/configs/succ_zero_generated_interestingness_experiment.yaml index b258a73..2d51de8 100644 --- a/frame/configs/succ_zero_generated_interestingness_experiment.yaml +++ b/frame/configs/succ_zero_generated_interestingness_experiment.yaml @@ -29,7 +29,7 @@ policy: params: concept_selection: INTERESTINGNESS action_selection: SIMULATE_AND_SCORE # RULE_BASED_RANDOM # TODO(George; 4/7): Change this back to SIMULATE_AND_SCORE when that implementation has been verified. - top_k_concepts: 5 + top_k_concepts: 8 temperature: 0.5 generate_interestingness: True interestingness_function_path: "" diff --git a/frame/configs/succ_zero_standard_experiment.yaml b/frame/configs/succ_zero_standard_experiment.yaml index d10ed9c..d750860 100644 --- a/frame/configs/succ_zero_standard_experiment.yaml +++ b/frame/configs/succ_zero_standard_experiment.yaml @@ -20,7 +20,7 @@ policy: type: interestingness_guided params: concept_selection: INTERESTINGNESS - top_k_concepts: 5 + top_k_concepts: 8 temperature: 0.5 simulation_limit: 3 action_selection: SIMULATE_AND_SCORE # RULE_BASED_RANDOM diff --git a/frame/configs/theory_building/initial_state/arithmetic_basic.yaml b/frame/configs/theory_building/initial_state/arithmetic_basic.yaml index 68f10dd..74a54ca 100644 --- a/frame/configs/theory_building/initial_state/arithmetic_basic.yaml +++ b/frame/configs/theory_building/initial_state/arithmetic_basic.yaml @@ -9,6 +9,7 @@ import_from: "frame.knowledge_base.initial_concepts" concepts: - "create_zero_concept" - "create_one_concept" + - "create_two_concept" - "create_addition_concept" - "create_multiplication_concept" - "create_divides_concept" diff --git a/frame/funsearch/implementation/abstraction_library.py b/frame/funsearch/implementation/abstraction_library.py index 63484fb..add676f 100644 --- a/frame/funsearch/implementation/abstraction_library.py +++ b/frame/funsearch/implementation/abstraction_library.py @@ -2,6 +2,8 @@ import dataclasses from typing import Optional, List, Dict import logging +import textwrap +import random # Add random import logger = logging.getLogger(__name__) @@ -52,11 +54,32 @@ def format_for_sampler_prompt(self) -> str: return " (No abstraction functions available in this island yet.)" prompt_lines = ["**Available Abstraction Functions:**"] - # Sort alphabetically for consistent prompt appearance - sorted_abstractions = sorted(self._abstractions.values(), key=lambda x: x.name) - for abs_obj in sorted_abstractions: - # Format consistently with DSL primitives if possible - prompt_lines.append(f"- `{abs_obj.name}{abs_obj.signature}`: {abs_obj.description}") + + abstractions_list = list(self._abstractions.values()) # Get a list of abstractions + num_abstractions = len(abstractions_list) + + indices_to_show_code = [] + if num_abstractions > 0: + # Determine how many to pick (up to 2) + k = min(num_abstractions, 2) + indices_to_show_code = random.sample(range(num_abstractions), k) + + # Sort alphabetically for consistent prompt appearance (after random selection) + # We sort the original list, then iterate with original indices + sorted_abstractions_with_original_indices = sorted( + [(i, abs_obj) for i, abs_obj in enumerate(abstractions_list)], + key=lambda x: x[1].name + ) + + for original_idx, abs_obj in sorted_abstractions_with_original_indices: + base_info = f"- `{abs_obj.name}{abs_obj.signature}`: {abs_obj.description}" + if original_idx in indices_to_show_code: + prompt_lines.append(base_info) + prompt_lines.append(" ```python") + prompt_lines.append(textwrap.indent(abs_obj.code, ' ')) + prompt_lines.append(" ```") + else: + prompt_lines.append(base_info) return "\n".join(prompt_lines) def generate_definitions_for_sandbox(self) -> str: diff --git a/frame/funsearch/main.py b/frame/funsearch/main.py index 13ad831..aaff2f4 100644 --- a/frame/funsearch/main.py +++ b/frame/funsearch/main.py @@ -133,7 +133,7 @@ def __init__(self, cfg: DictConfig): # --- Safeguard for excessive parallelism --- # Access TheoryBuilder's num_workers from the base config loaded by the sandbox tb_base_cfg = self.sandbox.base_cfg - tb_num_workers = omegaconf.OmegaConf.select(tb_base_cfg, 'experiment.num_workers', default=1) + tb_num_workers = omegaconf.OmegaConf.select(tb_base_cfg, 'experiment.num_workers', default=64) # Access num_islands from FunSearch config (after database setup) num_islands = self.cfg.programs_database.num_islands @@ -145,7 +145,7 @@ def __init__(self, cfg: DictConfig): f"FunSearch programs_database.num_islands ({num_islands}) = {total_potential_tb_workers}") # The limit is < 64, so >= 64 should raise an error. - WORKER_LIMIT = 64 + WORKER_LIMIT = 256 if total_potential_tb_workers > WORKER_LIMIT: error_msg = ( f"Potential concurrent TheoryBuilder worker count ({total_potential_tb_workers}) meets or exceeds limit of {WORKER_LIMIT}. " diff --git a/frame/interestingness/learning/algorithms/one_shot_llm.py b/frame/interestingness/learning/algorithms/one_shot_llm.py index ff88ade..28c1f2b 100644 --- a/frame/interestingness/learning/algorithms/one_shot_llm.py +++ b/frame/interestingness/learning/algorithms/one_shot_llm.py @@ -129,9 +129,9 @@ def _load_model_config(self, config_path: str) -> Any: # Create a default config class DefaultConfig: def __init__(self): - self.name = "gpt-4o-mini" - self.temperature = 0.7 - self.max_tokens = 4000 + self.name = "gpt-4o" + self.temperature = 1.0 + self.max_tokens = 4096 self.system_prompt = "You are a mathematical assistant that specializes in creating interestingness functions." return DefaultConfig() @@ -711,8 +711,8 @@ async def main(): # Create a generator generator = OneShotLLMGenerator( - model="gpt-4o-mini", - temperature=0.7, + model="gpt-4o", + temperature=1.0, logger=logger ) diff --git a/frame/interestingness/learning/baselines/hr_comprehensbility.py b/frame/interestingness/learning/baselines/hr_comprehensibility.py similarity index 100% rename from frame/interestingness/learning/baselines/hr_comprehensbility.py rename to frame/interestingness/learning/baselines/hr_comprehensibility.py diff --git a/frame/interestingness/learning/dsl_primitives.py b/frame/interestingness/learning/dsl_primitives.py index 2211cd3..3b8a04b 100644 --- a/frame/interestingness/learning/dsl_primitives.py +++ b/frame/interestingness/learning/dsl_primitives.py @@ -170,13 +170,39 @@ def get_entity_step_age(entity_id: str, graph: KnowledgeGraph) -> int: raise ValueError(f"Error getting entity_step_age for entity {entity_id}") # return 0 -def get_num_concepts(graph: KnowledgeGraph) -> int: - """Returns the number of concepts in the graph.""" - return len([node for node in graph.nodes if graph.nodes[node]['node_type'] == NodeType.CONCEPT]) +def get_num_concepts(entity_id = None, graph: KnowledgeGraph = None) -> int: + """Returns the number of concepts in the graph. + + Args: + entity_id: Optional entity ID. If provided, only counts concepts in the entity's subgraph. + If None, counts all concepts in the graph. + graph: The knowledge graph to analyze + """ + if graph is None: + if entity_id is None: + raise ValueError("Either entity_id or graph must be provided") + else: + graph = entity_id # passed the wrong way byGPT + if graph is not None: + # Original behavior - count all concepts in graph + return 1 + len([node for node in graph.nodes if graph.nodes[node]['node_type'] == NodeType.CONCEPT]) -def get_num_conjectures(graph: KnowledgeGraph) -> int: - """Returns the number of conjectures in the graph.""" - return len([node for node in graph.nodes if graph.nodes[node]['node_type'] == NodeType.CONJECTURE]) +def get_num_conjectures(entity_id = None, graph: KnowledgeGraph = None) -> int: + """Returns the number of conjectures in the graph. + + Args: + entity_id: Optional entity ID. If provided, only counts conjectures in the entity's subgraph. + If None, counts all conjectures in the graph. + graph: The knowledge graph to analyze + """ + if graph is None: + if entity_id is None: + raise ValueError("Either entity_id or graph must be provided") + else: + graph = entity_id # passed the wrong way byGPT + if graph is not None: + # Original behavior - count all conjectures in graph + return 1 + len([node for node in graph.nodes if graph.nodes[node]['node_type'] == NodeType.CONJECTURE]) # --- Entity Attribute Primitives --- @@ -185,10 +211,10 @@ def get_entity_node_type(entity_id: str, graph: KnowledgeGraph) -> float: """Returns a numeric representation of the node type (e.g., Concept=1, Conj=2, Thm=3).""" try: node_type = graph.nodes[entity_id]['node_type'] - if node_type == NodeType.CONCEPT: return 1.0 - if node_type == NodeType.CONJECTURE: return 2.0 - if node_type == NodeType.THEOREM: return 3.0 - return 0.0 + if node_type == NodeType.CONCEPT: return "Concept" + if node_type == NodeType.CONJECTURE: return "Conjecture" + if node_type == NodeType.THEOREM: return "Theorem" + return "Unknown" except Exception: raise ValueError(f"Error getting entity_node_type for entity {entity_id}") # return 0.0 @@ -199,11 +225,11 @@ def get_concept_category(entity_id: str, graph: KnowledgeGraph) -> float: entity = graph.nodes[entity_id]['entity'] if hasattr(entity, 'example_structure') and entity.example_structure: concept_type = entity.example_structure.concept_type - if concept_type == ConceptType.PREDICATE: return 1.0 - if concept_type == ConceptType.FUNCTION: return 2.0 - if concept_type == ConceptType.RELATION: return 3.0 - if concept_type == ConceptType.CONSTANT: return 4.0 - return 0.0 + if concept_type == ConceptType.PREDICATE: return "Predicate" + if concept_type == ConceptType.FUNCTION: return "Function" + if concept_type == ConceptType.RELATION: return "Relation" + if concept_type == ConceptType.CONSTANT: return "Constant" + return "Unknown" except Exception: raise ValueError(f"Error getting concept_category for entity {entity_id}") # return 0.0 @@ -237,7 +263,7 @@ def get_examples(entity_id: str, graph: KnowledgeGraph) -> List[Example]: try: entity = graph.nodes[entity_id]['entity'] if hasattr(entity, 'examples'): - return entity.examples.get_examples() + return [example.value for example in entity.examples.get_examples()] return [] except Exception: raise ValueError(f"Error getting examples for entity {entity_id}") @@ -248,7 +274,7 @@ def get_nonexamples(entity_id: str, graph: KnowledgeGraph) -> List[Example]: try: entity = graph.nodes[entity_id]['entity'] if hasattr(entity, 'examples'): - return entity.examples.get_nonexamples() + return [example.value for example in entity.examples.get_nonexamples()] return [] except Exception: raise ValueError(f"Error getting nonexamples for entity {entity_id}") diff --git a/frame/knowledge_base/initial_concepts.py b/frame/knowledge_base/initial_concepts.py index 597f354..7fd8daf 100644 --- a/frame/knowledge_base/initial_concepts.py +++ b/frame/knowledge_base/initial_concepts.py @@ -111,6 +111,37 @@ def create_one_concept(): one.add_nonexample((4,)) return one +def create_two_concept(): + """Factory function to create the two concept.""" + two = Concept( + name="two", + description="The constant two", + symbolic_definition=lambda: Succ(Succ(Zero())), + computational_implementation=lambda: 2, + example_structure=ExampleStructure( + concept_type=ConceptType.CONSTANT, + component_types=(ExampleType.NUMERIC,), + input_arity=0, + ), + can_add_examples=True, + can_add_nonexamples=True, + z3_translation=lambda: Z3Template( + """ + params 0; + bounded params 0; + ReturnExpr 2; + ReturnPred None; + """, + ) + ) + two.add_example((2,)) + two.add_nonexample((0,)) + two.add_nonexample((1,)) + two.add_nonexample((3,)) + two.add_nonexample((4,)) + return two + + def create_addition_concept(): """Factory function to create the addition concept.""" diff --git a/frame/tools/llm_caller.py b/frame/tools/llm_caller.py index 9bc1924..7659d2d 100644 --- a/frame/tools/llm_caller.py +++ b/frame/tools/llm_caller.py @@ -80,8 +80,8 @@ async def call_openai( self, conversation_id: str, prompt: str, - model: str = "gpt-4o-mini", - temperature: float = 0.7, + model: str = "gpt-4o", + temperature: float = 1.0, max_tokens: Optional[int] = None, system_prompt: Optional[str] = None, n: int = 1, @@ -167,8 +167,8 @@ async def call_model( final_params = {} # Start with base defaults or passed arguments - final_params['model'] = model if model is not None else "gpt-4o-mini" # Default model - final_params['temperature'] = temperature if temperature is not None else 0.7 # Default temp + final_params['model'] = model if model is not None else "gpt-4o" # Default model + final_params['temperature'] = temperature if temperature is not None else 1.0 # Default temp final_params['max_tokens'] = max_tokens # Can be None final_params['top_p'] = top_p # Can be None final_system_prompt = system_prompt # Can be None diff --git a/scripts/all_zero_100_episodes.sh b/scripts/all_zero_100_episodes.sh index 2ea6b75..86fd1d4 100755 --- a/scripts/all_zero_100_episodes.sh +++ b/scripts/all_zero_100_episodes.sh @@ -12,7 +12,7 @@ python -m frame.theory_builder \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ ++policy.params.interestingness_function_path="frame/interestingness/learning/baselines/all_zero.py" diff --git a/scripts/hr_applicability_100_episodes.sh b/scripts/hr_applicability_100_episodes.sh old mode 100644 new mode 100755 index d1c04f3..a9b9e2a --- a/scripts/hr_applicability_100_episodes.sh +++ b/scripts/hr_applicability_100_episodes.sh @@ -12,7 +12,7 @@ python -m frame.theory_builder \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ ++policy.params.interestingness_function_path="frame/interestingness/learning/baselines/hr_applicability.py" diff --git a/scripts/hr_comprehensibility_100_episodes.sh b/scripts/hr_comprehensibility_100_episodes.sh old mode 100644 new mode 100755 index 0947ad3..19ffd99 --- a/scripts/hr_comprehensibility_100_episodes.sh +++ b/scripts/hr_comprehensibility_100_episodes.sh @@ -12,10 +12,9 @@ python -m frame.theory_builder \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ - z3_usage.use_z3_example_search=true \ + z3_usage.use_z3_example_search=true \ ++policy.params.interestingness_function_path="frame/interestingness/learning/baselines/hr_comprehensibility.py" - echo "Experiment completed!" \ No newline at end of file diff --git a/scripts/hr_equal_weight_100_episodes.sh b/scripts/hr_equal_weight_100_episodes.sh index 7170008..05fc987 100755 --- a/scripts/hr_equal_weight_100_episodes.sh +++ b/scripts/hr_equal_weight_100_episodes.sh @@ -12,7 +12,7 @@ python -m frame.theory_builder \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ policy.params.interestingness_function_path="frame/interestingness/learning/baselines/hr_all_weighted.py" diff --git a/scripts/hr_novelty_100_episodes.sh b/scripts/hr_novelty_100_episodes.sh old mode 100644 new mode 100755 index 5fb14e7..2dbed70 --- a/scripts/hr_novelty_100_episodes.sh +++ b/scripts/hr_novelty_100_episodes.sh @@ -12,7 +12,7 @@ python -m frame.theory_builder \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ ++policy.params.interestingness_function_path="frame/interestingness/learning/baselines/hr_novelty.py" diff --git a/scripts/hr_parsimony_100_episodes.sh b/scripts/hr_parsimony_100_episodes.sh old mode 100644 new mode 100755 index 849b773..209b674 --- a/scripts/hr_parsimony_100_episodes.sh +++ b/scripts/hr_parsimony_100_episodes.sh @@ -12,7 +12,7 @@ python -m frame.theory_builder \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ ++policy.params.interestingness_function_path="frame/interestingness/learning/baselines/hr_parsimony.py" diff --git a/scripts/hr_productivity_100_episodes.sh b/scripts/hr_productivity_100_episodes.sh old mode 100644 new mode 100755 index c7bce4c..1e16a54 --- a/scripts/hr_productivity_100_episodes.sh +++ b/scripts/hr_productivity_100_episodes.sh @@ -12,7 +12,7 @@ python -m frame.theory_builder \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ ++policy.params.interestingness_function_path="frame/interestingness/learning/baselines/hr_productivity.py" diff --git a/scripts/run_experiment_existing_interestingness.sh b/scripts/run_experiment_existing_interestingness.sh index d88c749..039b284 100755 --- a/scripts/run_experiment_existing_interestingness.sh +++ b/scripts/run_experiment_existing_interestingness.sh @@ -10,33 +10,17 @@ cd "$(dirname "$0")/.." # Enable interestingness debugging export INTERESTINGNESS_DEBUG=1 -# First, check if there are any existing interestingness functions -INTERESTINGNESS_DIR="frame/interestingness/learning/generated_programs" -mkdir -p "$INTERESTINGNESS_DIR" - -# Find the most recent interestingness function -INTERESTINGNESS_PATH=$(find "$INTERESTINGNESS_DIR" -name "interestingness_*.py" -type f -print0 2>/dev/null | xargs -0 ls -t 2>/dev/null | head -n 1) - -# If no interestingness function found, use the test function -if [ -z "$INTERESTINGNESS_PATH" ]; then - echo "No existing interestingness functions found. Using test function instead." - INTERESTINGNESS_PATH="tests/interestingness/test_programs/complex_test.py" -fi - -# Get absolute path -INTERESTINGNESS_PATH="$(pwd)/$INTERESTINGNESS_PATH" -echo "Using interestingness function: $INTERESTINGNESS_PATH" # TODO(George; 4/23): THIS IS FOR TESTING SPECIFIC PATHS! # Run the experiment with the existing interestingness config -python -m frame.theory_builder \ +nohup python -m frame.theory_builder \ --config-name succ_zero_existing_interestingness_experiment \ - experiment.num_episodes=64 \ + experiment.num_episodes=128 \ experiment.max_steps=1000 \ - experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.episode_timeout_seconds=30 \ + experiment.num_workers=128 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ - "+policy.params.interestingness_function_path=$INTERESTINGNESS_PATH" + ++policy.params.interestingness_function_path="frame/interestingness/learning/tests/gpt4o_test_1.py" echo "Experiment completed!" \ No newline at end of file diff --git a/scripts/run_experiment_generate_interestingness.sh b/scripts/run_experiment_generate_interestingness.sh index 717d63f..1f44874 100755 --- a/scripts/run_experiment_generate_interestingness.sh +++ b/scripts/run_experiment_generate_interestingness.sh @@ -16,12 +16,12 @@ echo "Using model config directory: $MODEL_CONFIG_DIR" echo "Using prompt template directory: $PROMPT_TEMPLATE_DIR" # Run the experiment with the generated interestingness config -python -m frame.theory_builder \ +nohup python -m frame.theory_builder \ --config-name succ_zero_generated_interestingness_experiment \ experiment.num_episodes=64 \ experiment.max_steps=1000 \ experiment.episode_timeout_seconds=120 \ - experiment.num_workers=32 \ + experiment.num_workers=64 \ z3_usage.use_z3_prover=true \ z3_usage.use_z3_example_search=true \ experiment.num_interestingness_to_generate=64 diff --git a/scripts/run_funsearch_test.sh b/scripts/run_funsearch_test.sh new file mode 100755 index 0000000..7fab6f6 --- /dev/null +++ b/scripts/run_funsearch_test.sh @@ -0,0 +1,29 @@ +#!/bin/bash +# Script to run FunSearch WITH the abstraction mechanism ENABLED, using Hydra defaults. + +echo "Running FunSearch experiment with abstraction enabled (using defaults from mid.yaml)..." + +# --- Configurable Parameters --- +# Most parameters are now expected to be set in configs/funsearch/mid.yaml +# You can still override specific ones via the command line below if needed. +# OUTPUT_DIR is now managed by Hydra. +# --- End Configurable Parameters --- + +# Go to the project root directory +cd "$(dirname "$0")/.." + +# Hydra will create the output directory. + +echo "Output directory will be managed by Hydra." +echo "Running with defaults from mid.yaml and:" +echo " Abstraction: ENABLED" + +# Run the FunSearch main script using Hydra overrides +# Only enabling abstraction explicitly. +nohup python -m frame.funsearch.main \ + --config-name funsearch-test \ + abstraction.enabled=true + +echo "FunSearch experiment with abstraction completed!" +# Output directory location depends on Hydra configuration (e.g., ./outputs/YYYY-MM-DD/HH-MM-SS) +echo "Results saved in Hydra output directory." \ No newline at end of file diff --git a/scripts/run_funsearch_with_abstraction.sh b/scripts/run_funsearch_with_abstraction.sh index 6b0cebb..441e5e9 100755 --- a/scripts/run_funsearch_with_abstraction.sh +++ b/scripts/run_funsearch_with_abstraction.sh @@ -20,7 +20,7 @@ echo " Abstraction: ENABLED" # Run the FunSearch main script using Hydra overrides # Only enabling abstraction explicitly. -python -m frame.funsearch.main \ +nohup python -m frame.funsearch.main \ --config-name funsearch-mid \ abstraction.enabled=true