Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ build/
venv/
env/

COST_TRACKING_CHANGELOG.md

# IDE
.idea/
.vscode/
Expand Down
1 change: 0 additions & 1 deletion benchmarks/ADRS/cloudcast/evaluator/evaluator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from utils import *
from simulator import *
from broadcast import *
from initial_program import search_algorithm
import networkx as nx


Expand Down
7 changes: 5 additions & 2 deletions benchmarks/ADRS/cloudcast/evaluator/simulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,8 +165,11 @@ def __transfer_time(self, log=True):
for dst in self.dsts:
partition_time = float("-inf")
for i in range(self.num_partitions):
for edge in self.paths[dst][str(i)]:
edge_data = self.g[edge[0]][edge[1]]
path_edges = self.paths[dst][str(i)]
# Transfer time = partition_data_vol / bottleneck flow along the path
bottleneck = min(self.g[e[0]][e[1]]['flow'] for e in path_edges)
t = self.partition_data_vol / bottleneck if bottleneck > 0 else float('inf')
partition_time = max(partition_time, t)
t_dict[dst] = partition_time

max_t = max(t_dict.values())
Expand Down
3 changes: 3 additions & 0 deletions skydiscover/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ class DiscoveryResult:
metrics: Dict[str, Any]
output_dir: Optional[str]
initial_score: Optional[float] = None
llm_cost_summary: Optional[Dict[str, Any]] = None

def __repr__(self) -> str:
init = f"{self.initial_score:.4f}" if self.initial_score is not None else "N/A"
Expand Down Expand Up @@ -234,6 +235,7 @@ async def _run_discovery_async(
best_score = get_score(metrics)

initial_score = controller.initial_score
llm_cost_summary = controller.get_llm_cost_summary()

# Return the result
return DiscoveryResult(
Expand All @@ -243,6 +245,7 @@ async def _run_discovery_async(
metrics=metrics,
output_dir=actual_output_dir if not cleanup else None,
initial_score=initial_score,
llm_cost_summary=llm_cost_summary,
)

finally:
Expand Down
28 changes: 24 additions & 4 deletions skydiscover/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,18 +228,38 @@ async def main_async() -> int:
iterations=args.iterations,
checkpoint_path=args.checkpoint,
)
llm_cost_summary = runner.get_llm_cost_summary()

checkpoint_dir = os.path.join(runner.output_dir, "checkpoints")
latest_checkpoint = _find_latest_checkpoint(checkpoint_dir)

print("\nDiscovery complete!")
print("\n" + "=" * 50)
print(" Discovery Run Summary")
print("=" * 50)
if best_program is None:
print("No valid programs were found.")
print(" Best score: N/A (no valid programs found)")
else:
print("Best program metrics:")
print(" Best program metrics:")
for name, value in best_program.metrics.items():
formatted = f"{value:.4f}" if isinstance(value, (int, float)) else str(value)
print(f" {name}: {formatted}")
print(f" {name}: {formatted}")
print("-" * 50)
totals = llm_cost_summary["total"]
print(" LLM Cost Breakdown:")
print(f" Calls: {totals['call_count']}")
print(f" Input tokens: {totals['input_tokens']:,}")
print(f" Output tokens: {totals['output_tokens']:,}")
print(f" Input cost: ${totals['input_cost_usd']:.6f}")
print(f" Output cost: ${totals['output_cost_usd']:.6f}")
print(f" Total cost: ${totals['total_cost_usd']:.6f}")
by_cat = llm_cost_summary.get("by_category", {})
if by_cat:
print(" By category:")
for cat, cat_data in by_cat.items():
print(
f" {cat}: {cat_data['call_count']} calls, ${cat_data['total_cost_usd']:.6f}"
)
print("=" * 50)

if latest_checkpoint:
print(f"\nLatest checkpoint: {latest_checkpoint}")
Expand Down
14 changes: 14 additions & 0 deletions skydiscover/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,10 @@ class LLMModelConfig:
retries: Optional[int] = None
retry_delay: Optional[int] = None

# Pricing parameters (USD per 1M tokens)
input_price_per_million_tokens: Optional[float] = None
output_price_per_million_tokens: Optional[float] = None

# Reasoning parameters
reasoning_effort: Optional[str] = None

Expand All @@ -165,6 +169,10 @@ class LLMConfig(LLMModelConfig):
retries: int = 3
retry_delay: int = 5

# Pricing parameters (USD per 1M tokens)
input_price_per_million_tokens: Optional[float] = None
output_price_per_million_tokens: Optional[float] = None

# model(s) for solution discovery
models: List[LLMModelConfig] = field(default_factory=list)

Expand Down Expand Up @@ -222,6 +230,8 @@ def __post_init__(self):
"timeout": self.timeout,
"retries": self.retries,
"retry_delay": self.retry_delay,
"input_price_per_million_tokens": self.input_price_per_million_tokens,
"output_price_per_million_tokens": self.output_price_per_million_tokens,
"reasoning_effort": self.reasoning_effort,
}
self.update_model_params(shared_config)
Expand Down Expand Up @@ -696,6 +706,8 @@ def to_dict(self) -> Dict[str, Any]:
"timeout": self.llm.timeout,
"retries": self.llm.retries,
"retry_delay": self.llm.retry_delay,
"input_price_per_million_tokens": self.llm.input_price_per_million_tokens,
"output_price_per_million_tokens": self.llm.output_price_per_million_tokens,
},
"prompt": {
"template": self.context_builder.template,
Expand Down Expand Up @@ -905,6 +917,8 @@ def apply_overrides(
"timeout": config.llm.timeout,
"retries": config.llm.retries,
"retry_delay": config.llm.retry_delay,
"input_price_per_million_tokens": config.llm.input_price_per_million_tokens,
"output_price_per_million_tokens": config.llm.output_price_per_million_tokens,
"reasoning_effort": config.llm.reasoning_effort,
},
overwrite=True,
Expand Down
62 changes: 56 additions & 6 deletions skydiscover/llm/agentic_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

from skydiscover.llm.base import LLMResponse
from skydiscover.llm.cost import compute_token_costs, extract_usage_counts
from skydiscover.llm.openai import is_openai_reasoning_model
from skydiscover.utils.code_utils import build_repo_map

Expand Down Expand Up @@ -44,12 +46,20 @@ def __init__(self, llm_pool, config):
self.llm_pool = llm_pool
self.config = config

async def generate(self, system_message: str, user_message: str) -> Optional[str]:
"""Run the agent loop. Returns generated text, or None on failure."""
async def generate(self, system_message: str, user_message: str) -> Optional[LLMResponse]:
"""Run the agent loop. Returns aggregated usage for the final text, or None on failure."""
cfg = self.config
files_read: set = set()
conversation: List[Dict[str, Any]] = []
t0 = time.time()
total_input_tokens = 0
total_output_tokens = 0
total_tokens = 0
total_input_cost_usd = 0.0
total_output_cost_usd = 0.0
total_cost_usd = 0.0
last_model_name: Optional[str] = None
last_usage_source: Optional[str] = None

sys_prompt = f"{system_message}\n\n{_AGENTIC_SYSTEM_PROMPT}"
repo_map = build_repo_map(
Expand Down Expand Up @@ -78,7 +88,7 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str
)

try:
assistant_msg = await asyncio.wait_for(
assistant_msg, llm_response = await asyncio.wait_for(
self._call_llm(sys_prompt, conversation),
timeout=cfg.per_step_timeout,
)
Expand All @@ -95,6 +105,15 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str
logger.error("Step %d: LLM error: %s", step, e)
break

total_input_tokens += llm_response.input_tokens
total_output_tokens += llm_response.output_tokens
total_tokens += llm_response.total_tokens
total_input_cost_usd += llm_response.input_cost_usd
total_output_cost_usd += llm_response.output_cost_usd
total_cost_usd += llm_response.total_cost_usd
last_model_name = llm_response.model_name
last_usage_source = llm_response.usage_source

tool_calls = assistant_msg.get("tool_calls", [])
text_content = assistant_msg.get("content", "").strip()
conversation.append(assistant_msg)
Expand All @@ -104,7 +123,17 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str
logger.info(
"Agent produced text at step %d (%d files read)", step, len(files_read)
)
return text_content
return LLMResponse(
text=text_content,
model_name=last_model_name,
usage_source=last_usage_source,
input_tokens=total_input_tokens,
output_tokens=total_output_tokens,
total_tokens=total_tokens,
input_cost_usd=total_input_cost_usd,
output_cost_usd=total_output_cost_usd,
total_cost_usd=total_cost_usd,
)
conversation.append(
{
"role": "user",
Expand Down Expand Up @@ -145,7 +174,7 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str

async def _call_llm(
self, system_message: str, conversation: List[Dict[str, Any]]
) -> Dict[str, Any]:
) -> tuple[Dict[str, Any], LLMResponse]:
"""Call a sampled LLM with tool schemas."""
model = self.llm_pool.models[
self.llm_pool.random_state.choices(
Expand Down Expand Up @@ -184,6 +213,27 @@ async def _call_llm(
resp = await loop.run_in_executor(
None, lambda: model.client.chat.completions.create(**params)
)
input_tokens, output_tokens, total_tokens = extract_usage_counts(
getattr(resp, "usage", None)
)
input_cost_usd, output_cost_usd, total_cost_usd = compute_token_costs(
input_tokens,
output_tokens,
getattr(model, "input_price_per_million_tokens", None),
getattr(model, "output_price_per_million_tokens", None),
)
llm_response = LLMResponse(
text=resp.choices[0].message.content or "",
model_name=getattr(resp, "model", None) or getattr(model, "model", None),
usage_source="api" if getattr(resp, "usage", None) is not None else None,
input_tokens=input_tokens,
output_tokens=output_tokens,
total_tokens=total_tokens,
input_cost_usd=input_cost_usd,
output_cost_usd=output_cost_usd,
total_cost_usd=total_cost_usd,
)
self.llm_pool.record_response_usage(llm_response)

msg = resp.choices[0].message
out: Dict[str, Any] = {"role": "assistant", "content": msg.content or ""}
Expand All @@ -196,7 +246,7 @@ async def _call_llm(
}
for tc in msg.tool_calls
]
return out
return out, llm_response

# ------------------------------------------------------------------
# Tools
Expand Down
10 changes: 10 additions & 0 deletions skydiscover/llm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,20 @@ class LLMResponse:

text: generated text content.
image_path: path to generated image file, or None for text-only.
model_name: resolved provider model name.
usage_source: how token usage was obtained ("api", "estimated", etc.).
"""

text: str = ""
image_path: Optional[str] = None
model_name: Optional[str] = None
usage_source: Optional[str] = None
input_tokens: int = 0
output_tokens: int = 0
total_tokens: int = 0
input_cost_usd: float = 0.0
output_cost_usd: float = 0.0
total_cost_usd: float = 0.0


class LLMInterface(ABC):
Expand Down
Loading
Loading