Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
143 changes: 72 additions & 71 deletions agent_system.py

Large diffs are not rendered by default.

73 changes: 73 additions & 0 deletions clients.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import os
from abc import ABC, abstractmethod

from google import genai
from google.genai import types
from openai import OpenAI


class LLMClient(ABC):

@property
def provider(self):
raise NotImplementedError

@abstractmethod
def call_llm(self, prompt: str, system_instruction: str = "") -> str:
pass

class GeminiClient(LLMClient):
def __init__(self, model_name: str):
self.model_name = model_name
try:
self.client = genai.Client(
api_key=os.environ.get("GEMINI_API_KEY"))
print("Gemini API client initialized successfully")
except Exception as e:
print(f"Error initializing Gemini API client: {e}")
print("Make sure to set the GEMINI_API_KEY environment variable")
raise

@property
def provider(self):
return "gemini"

def call_llm(self, prompt: str, system_instruction: str = "") -> str:
"""Call the Gemini LLM with a prompt and return the response"""
try:
# Use provided system instruction or default to the loaded system prompt
sys_instruction = system_instruction if system_instruction is not None else ""

response = self.client.models.generate_content(
model=self.model_name,
config=types.GenerateContentConfig(
system_instruction=sys_instruction),
contents=prompt)
return response.text
except Exception as e:
print(f"Error calling Gemini API: {e}")
return f"Error: {str(e)}"



class OpenAIClient(LLMClient):
def __init__(self, model_name: str):
self.model_name = model_name
api_key = os.environ.get("OPENAI_API_KEY")
org_id = os.environ.get("OPENAI_ORG_ID")
self.client = OpenAI(api_key=api_key, organization=org_id) if org_id else OpenAI(api_key=api_key)

@property
def provider(self):
return "openai"

def call_llm(self, prompt: str, system_instruction: str = "") -> str:
try:
response = self.client.chat.completions.create(
model=self.model_name,
messages=[{"role": "system", "content": system_instruction}, {"role": "user", "content": prompt}]
)
return response.choices[0].message.content
except Exception as e:
print(f"Error calling OpenAI API: {e}")
return f"Error: {str(e)}"
12 changes: 6 additions & 6 deletions prompts/script_generation/strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def get_explore_instructions(example_problems, historical_context, last_scripts_
last_scripts_context: Context about the last 5 scripts tried
learning_context: Accumulated learnings from previous iterations
capability_context: Capability assessment and improvement guidance
gemini_api_example: Standard API usage example
llm_api_example: Standard API usage example

Returns:
str: Complete exploration prompt
Expand Down Expand Up @@ -71,7 +71,7 @@ def get_explore_instructions(example_problems, historical_context, last_scripts_
- If it is unknown how successful a processing state or part of the pipeline is, include verification steps to different parts of the pipeline in order to help deduce which parts are successful and where the system is breaking
- Answer checkers to validate the final answer against the problem statement. If the answer is incorrect, the checker can send the answer back to an earlier part of the system for for refinement with feedback

Here's how to call the Gemini API. Use this example without modification and don't invent configuration options:
Here's how to call the LLM API. Use this example without modification and don't invent configuration options:
{llm_api_example}

Since this is an EXPLORATION phase:
Expand Down Expand Up @@ -122,7 +122,7 @@ def get_exploit_instructions(example_problems, historical_context, top_scripts_a
top_scripts_analysis: Analysis of top performing scripts to combine
learning_context: Accumulated learnings from previous iterations
capability_context: Capability assessment and improvement guidance
gemini_api_example: Standard API usage example
llm_api_example: Standard API usage example

Returns:
str: Complete exploitation prompt
Expand Down Expand Up @@ -185,7 +185,7 @@ def get_exploit_instructions(example_problems, historical_context, top_scripts_a
4. The hybrid should be more robust than any individual approach
5. Address the weaknesses identified in the capability assessment through synthesis

Here's how to call the Gemini API. Use this example without modification:
Here's how to call the LLM API. Use this example without modification:
{llm_api_example}

SYNTHESIS IMPLEMENTATION:
Expand Down Expand Up @@ -223,7 +223,7 @@ def get_refine_instructions(example_problems, historical_context, best_script_to
error_samples: List of failed examples from the best script
learning_context: Accumulated learnings from previous iterations
capability_context: Capability assessment and improvement guidance
gemini_api_example: Standard API usage example
llm_api_example: Standard API usage example

Returns:
str: Complete refinement prompt
Expand Down Expand Up @@ -296,7 +296,7 @@ def get_refine_instructions(example_problems, historical_context, best_script_to
5. EVERY LLM PROMPT must include embedded examples
6. Test your hypothesis with additional verification

Here's how to call the Gemini API. Use this example without modification:
Here's how to call the LLM API. Use this example without modification:
{llm_api_example}

REFINEMENT IMPLEMENTATION:
Expand Down
40 changes: 40 additions & 0 deletions providers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from abc import ABC, abstractmethod

from clients import GeminiClient, LLMClient, OpenAIClient


class LLMProvider(ABC):

@abstractmethod
def build_client(self, model_name: str) -> LLMClient:
return GeminiClient(model_name)



class GeminiProvider(LLMProvider):
def build_client(self, model_name: str) -> GeminiClient:
return GeminiClient(model_name)

class OpenAIProvider(LLMProvider):
def build_client(self, model_name: str) -> OpenAIClient:
return OpenAIClient(model_name)



class ProviderFactory:
@staticmethod
def get_provider(provider_type: str) -> LLMProvider:
if provider_type == "gemini":
return GeminiProvider()
elif provider_type == "openai":
return OpenAIProvider()
elif provider_type == "huggingface":
# TODO(jam): add in separate PR
raise NotImplementedError("HF not yet supported")
else:
raise ValueError(f"Invalid provider type: {provider_type}")

@staticmethod
def get_client(provider_type: str, model_name: str) -> LLMClient:
provider = ProviderFactory.get_provider(provider_type)
return provider.build_client(model_name)
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ version = "0.1.0"
description = ""
authors = ["Nick Ryan <[email protected]>"]
requires-python = ">=3.11"

dependencies = [
"openai>=1.72.0",
"sift-stack-py>=0.5.0",
Expand All @@ -12,4 +13,10 @@ dependencies = [
"matplotlib>=3.10.0",
"numpy>=2.2.0",
"requests>=2.32.0",
"ruff"
]


[tool.ruff]
line-length = 120
lint.select = ["E", "F", "I"] # enable error, pyflakes, and import codes
29 changes: 20 additions & 9 deletions run_script.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

from agent_system import AgentSystem
from dataset_loader import create_dataset_loader
from providers import ProviderFactory

# Fixed random seed for reproducibility (if shuffling is enabled)
RANDOM_SEED = 42
Expand All @@ -31,11 +32,15 @@ def run_agent(iterations: int, loader_config: Dict) -> None:
# Create the appropriate dataset loader
try:
loader_type = loader_config.pop("loader_type")
provider = loader_config.pop("provider")
model = loader_config.pop("model")
print(f"Loading {provider} {model}")
dataset_loader = create_dataset_loader(loader_type, **loader_config)
client = ProviderFactory.get_client(provider, model)
print(f"Created {loader_type} dataset loader with {dataset_loader.get_total_count()} examples")

# Initialize the agent system with the dataset loader
agent = AgentSystem(dataset_loader=dataset_loader)
agent = AgentSystem(client=client, dataset_loader=dataset_loader)
except Exception as e:
print(f"Error initializing system: {e}")
sys.exit(1)
Expand Down Expand Up @@ -171,6 +176,17 @@ def parse_arguments():
parser = argparse.ArgumentParser(
description="Run the Agentic Learning System with custom dataset loaders")

parser.add_argument("--provider",
type=str,
default="gemini",
choices=["gemini", "openai"],
help="Provider to use (default: gemini)")

parser.add_argument("--model",
type=str,
default="gemini-2.0-flash",
help="Model to use (default: gemini-2.0-flash)")

parser.add_argument("--iterations",
"-i",
type=int,
Expand Down Expand Up @@ -248,17 +264,12 @@ def parse_arguments():
# Parse command-line arguments
args = parse_arguments()

# Check environment variables
if not os.environ.get("GEMINI_API_KEY"):
print("Error: GEMINI_API_KEY environment variable is not set.")
print(
"Please set this variable to your Gemini API key before running the script."
)
print("Example: export GEMINI_API_KEY=your_api_key_here")
sys.exit(1)


# Create loader configuration
loader_config = {
"provider": args.provider,
"model": args.model,
"loader_type": args.loader,
"dataset_path": args.dataset,
"shuffle": not args.no_shuffle,
Expand Down
48 changes: 20 additions & 28 deletions test_script_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,41 +7,33 @@
import functools
import importlib.util


# Add the scripts directory to the path
sys.path.append("{scripts_dir}")

# Configure tracing
trace_file = "{trace_file}"
os.makedirs(os.path.dirname(trace_file), exist_ok=True)

def call_llm(prompt, system_instruction=None):
try:
from google import genai
from google.genai import types
import os # Import the os module

# Initialize the Gemini client
client = genai.Client(api_key=os.environ.get("GEMINI_API_KEY"))

# Call the API with system instruction if provided
if system_instruction:
response = client.models.generate_content(
model="gemini-2.0-flash",
config=types.GenerateContentConfig(
system_instruction=system_instruction
),
contents=prompt
)
else:
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=prompt
)

return response.text
except Exception as e:
print("Error calling Gemini API: " + str(e))
return "Error: " + str(e)
import sys
import os
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))

from clients import GeminiClient, OpenAIClient

provider_type = "{provider_type}"
model_name = "{model_name}"

if provider_type == "gemini":
client = GeminiClient(model_name)
elif provider_type == "openai":
client = OpenAIClient(model_name)
else:
raise ValueError(f"Unknown provider: {provider_type}")

def call_llm(prompt, system_instruction=""):
return client.call_llm(prompt, system_instruction)


def execute_code(code_str, timeout=10):
"""Execute Python code with automatic package installation and proper scoping"""
Expand Down
Loading