skydiscover-ai · mert-cemri · Mar 28, 2026 · Mar 29, 2026
diff --git a/.gitignore b/.gitignore
@@ -14,6 +14,8 @@ build/
 venv/
 env/
 
+COST_TRACKING_CHANGELOG.md
+
 # IDE
 .idea/
 .vscode/

diff --git a/benchmarks/ADRS/cloudcast/evaluator/evaluator.py b/benchmarks/ADRS/cloudcast/evaluator/evaluator.py
@@ -11,7 +11,6 @@
 from utils import *
 from simulator import *
 from broadcast import *
-from initial_program import search_algorithm
 import networkx as nx
 
 

diff --git a/benchmarks/ADRS/cloudcast/evaluator/simulator.py b/benchmarks/ADRS/cloudcast/evaluator/simulator.py
@@ -165,8 +165,11 @@ def __transfer_time(self, log=True):
         for dst in self.dsts:
             partition_time = float("-inf")
             for i in range(self.num_partitions):
-                for edge in self.paths[dst][str(i)]:
-                    edge_data = self.g[edge[0]][edge[1]]
+                path_edges = self.paths[dst][str(i)]
+                # Transfer time = partition_data_vol / bottleneck flow along the path
+                bottleneck = min(self.g[e[0]][e[1]]['flow'] for e in path_edges)
+                t = self.partition_data_vol / bottleneck if bottleneck > 0 else float('inf')
+                partition_time = max(partition_time, t)
             t_dict[dst] = partition_time
 
         max_t = max(t_dict.values())

diff --git a/skydiscover/api.py b/skydiscover/api.py
@@ -48,6 +48,7 @@ class DiscoveryResult:
     metrics: Dict[str, Any]
     output_dir: Optional[str]
     initial_score: Optional[float] = None
+    llm_cost_summary: Optional[Dict[str, Any]] = None
 
     def __repr__(self) -> str:
         init = f"{self.initial_score:.4f}" if self.initial_score is not None else "N/A"
@@ -234,6 +235,7 @@ async def _run_discovery_async(
             best_score = get_score(metrics)
 
         initial_score = controller.initial_score
+        llm_cost_summary = controller.get_llm_cost_summary()
 
         # Return the result
         return DiscoveryResult(
@@ -243,6 +245,7 @@ async def _run_discovery_async(
             metrics=metrics,
             output_dir=actual_output_dir if not cleanup else None,
             initial_score=initial_score,
+            llm_cost_summary=llm_cost_summary,
         )
 
     finally:

diff --git a/skydiscover/cli.py b/skydiscover/cli.py
@@ -228,18 +228,38 @@ async def main_async() -> int:
             iterations=args.iterations,
             checkpoint_path=args.checkpoint,
         )
+        llm_cost_summary = runner.get_llm_cost_summary()
 
         checkpoint_dir = os.path.join(runner.output_dir, "checkpoints")
         latest_checkpoint = _find_latest_checkpoint(checkpoint_dir)
 
-        print("\nDiscovery complete!")
+        print("\n" + "=" * 50)
+        print("  Discovery Run Summary")
+        print("=" * 50)
         if best_program is None:
-            print("No valid programs were found.")
+            print("  Best score: N/A (no valid programs found)")
         else:
-            print("Best program metrics:")
+            print("  Best program metrics:")
             for name, value in best_program.metrics.items():
                 formatted = f"{value:.4f}" if isinstance(value, (int, float)) else str(value)
-                print(f"  {name}: {formatted}")
+                print(f"    {name}: {formatted}")
+        print("-" * 50)
+        totals = llm_cost_summary["total"]
+        print("  LLM Cost Breakdown:")
+        print(f"    Calls:         {totals['call_count']}")
+        print(f"    Input tokens:  {totals['input_tokens']:,}")
+        print(f"    Output tokens: {totals['output_tokens']:,}")
+        print(f"    Input cost:    ${totals['input_cost_usd']:.6f}")
+        print(f"    Output cost:   ${totals['output_cost_usd']:.6f}")
+        print(f"    Total cost:    ${totals['total_cost_usd']:.6f}")
+        by_cat = llm_cost_summary.get("by_category", {})
+        if by_cat:
+            print("  By category:")
+            for cat, cat_data in by_cat.items():
+                print(
+                    f"    {cat}: {cat_data['call_count']} calls, ${cat_data['total_cost_usd']:.6f}"
+                )
+        print("=" * 50)
 
         if latest_checkpoint:
             print(f"\nLatest checkpoint: {latest_checkpoint}")

diff --git a/skydiscover/config.py b/skydiscover/config.py
@@ -143,6 +143,10 @@ class LLMModelConfig:
     retries: Optional[int] = None
     retry_delay: Optional[int] = None
 
+    # Pricing parameters (USD per 1M tokens)
+    input_price_per_million_tokens: Optional[float] = None
+    output_price_per_million_tokens: Optional[float] = None
+
     # Reasoning parameters
     reasoning_effort: Optional[str] = None
 
@@ -165,6 +169,10 @@ class LLMConfig(LLMModelConfig):
     retries: int = 3
     retry_delay: int = 5
 
+    # Pricing parameters (USD per 1M tokens)
+    input_price_per_million_tokens: Optional[float] = None
+    output_price_per_million_tokens: Optional[float] = None
+
     # model(s) for solution discovery
     models: List[LLMModelConfig] = field(default_factory=list)
 
@@ -222,6 +230,8 @@ def __post_init__(self):
             "timeout": self.timeout,
             "retries": self.retries,
             "retry_delay": self.retry_delay,
+            "input_price_per_million_tokens": self.input_price_per_million_tokens,
+            "output_price_per_million_tokens": self.output_price_per_million_tokens,
             "reasoning_effort": self.reasoning_effort,
         }
         self.update_model_params(shared_config)
@@ -696,6 +706,8 @@ def to_dict(self) -> Dict[str, Any]:
                 "timeout": self.llm.timeout,
                 "retries": self.llm.retries,
                 "retry_delay": self.llm.retry_delay,
+                "input_price_per_million_tokens": self.llm.input_price_per_million_tokens,
+                "output_price_per_million_tokens": self.llm.output_price_per_million_tokens,
             },
             "prompt": {
                 "template": self.context_builder.template,
@@ -905,6 +917,8 @@ def apply_overrides(
                 "timeout": config.llm.timeout,
                 "retries": config.llm.retries,
                 "retry_delay": config.llm.retry_delay,
+                "input_price_per_million_tokens": config.llm.input_price_per_million_tokens,
+                "output_price_per_million_tokens": config.llm.output_price_per_million_tokens,
                 "reasoning_effort": config.llm.reasoning_effort,
             },
             overwrite=True,

diff --git a/skydiscover/llm/agentic_generator.py b/skydiscover/llm/agentic_generator.py
@@ -11,6 +11,8 @@
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
+from skydiscover.llm.base import LLMResponse
+from skydiscover.llm.cost import compute_token_costs, extract_usage_counts
 from skydiscover.llm.openai import is_openai_reasoning_model
 from skydiscover.utils.code_utils import build_repo_map
 
@@ -44,12 +46,20 @@ def __init__(self, llm_pool, config):
         self.llm_pool = llm_pool
         self.config = config
 
-    async def generate(self, system_message: str, user_message: str) -> Optional[str]:
-        """Run the agent loop. Returns generated text, or None on failure."""
+    async def generate(self, system_message: str, user_message: str) -> Optional[LLMResponse]:
+        """Run the agent loop. Returns aggregated usage for the final text, or None on failure."""
         cfg = self.config
         files_read: set = set()
         conversation: List[Dict[str, Any]] = []
         t0 = time.time()
+        total_input_tokens = 0
+        total_output_tokens = 0
+        total_tokens = 0
+        total_input_cost_usd = 0.0
+        total_output_cost_usd = 0.0
+        total_cost_usd = 0.0
+        last_model_name: Optional[str] = None
+        last_usage_source: Optional[str] = None
 
         sys_prompt = f"{system_message}\n\n{_AGENTIC_SYSTEM_PROMPT}"
         repo_map = build_repo_map(
@@ -78,7 +88,7 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str
                 )
 
             try:
-                assistant_msg = await asyncio.wait_for(
+                assistant_msg, llm_response = await asyncio.wait_for(
                     self._call_llm(sys_prompt, conversation),
                     timeout=cfg.per_step_timeout,
                 )
@@ -95,6 +105,15 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str
                 logger.error("Step %d: LLM error: %s", step, e)
                 break
 
+            total_input_tokens += llm_response.input_tokens
+            total_output_tokens += llm_response.output_tokens
+            total_tokens += llm_response.total_tokens
+            total_input_cost_usd += llm_response.input_cost_usd
+            total_output_cost_usd += llm_response.output_cost_usd
+            total_cost_usd += llm_response.total_cost_usd
+            last_model_name = llm_response.model_name
+            last_usage_source = llm_response.usage_source
+
             tool_calls = assistant_msg.get("tool_calls", [])
             text_content = assistant_msg.get("content", "").strip()
             conversation.append(assistant_msg)
@@ -104,7 +123,17 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str
                     logger.info(
                         "Agent produced text at step %d (%d files read)", step, len(files_read)
                     )
-                    return text_content
+                    return LLMResponse(
+                        text=text_content,
+                        model_name=last_model_name,
+                        usage_source=last_usage_source,
+                        input_tokens=total_input_tokens,
+                        output_tokens=total_output_tokens,
+                        total_tokens=total_tokens,
+                        input_cost_usd=total_input_cost_usd,
+                        output_cost_usd=total_output_cost_usd,
+                        total_cost_usd=total_cost_usd,
+                    )
                 conversation.append(
                     {
                         "role": "user",
@@ -145,7 +174,7 @@ async def generate(self, system_message: str, user_message: str) -> Optional[str
 
     async def _call_llm(
         self, system_message: str, conversation: List[Dict[str, Any]]
-    ) -> Dict[str, Any]:
+    ) -> tuple[Dict[str, Any], LLMResponse]:
         """Call a sampled LLM with tool schemas."""
         model = self.llm_pool.models[
             self.llm_pool.random_state.choices(
@@ -184,6 +213,27 @@ async def _call_llm(
         resp = await loop.run_in_executor(
             None, lambda: model.client.chat.completions.create(**params)
         )
+        input_tokens, output_tokens, total_tokens = extract_usage_counts(
+            getattr(resp, "usage", None)
+        )
+        input_cost_usd, output_cost_usd, total_cost_usd = compute_token_costs(
+            input_tokens,
+            output_tokens,
+            getattr(model, "input_price_per_million_tokens", None),
+            getattr(model, "output_price_per_million_tokens", None),
+        )
+        llm_response = LLMResponse(
+            text=resp.choices[0].message.content or "",
+            model_name=getattr(resp, "model", None) or getattr(model, "model", None),
+            usage_source="api" if getattr(resp, "usage", None) is not None else None,
+            input_tokens=input_tokens,
+            output_tokens=output_tokens,
+            total_tokens=total_tokens,
+            input_cost_usd=input_cost_usd,
+            output_cost_usd=output_cost_usd,
+            total_cost_usd=total_cost_usd,
+        )
+        self.llm_pool.record_response_usage(llm_response)
 
         msg = resp.choices[0].message
         out: Dict[str, Any] = {"role": "assistant", "content": msg.content or ""}
@@ -196,7 +246,7 @@ async def _call_llm(
                 }
                 for tc in msg.tool_calls
             ]
-        return out
+        return out, llm_response
 
     # ------------------------------------------------------------------
     # Tools

diff --git a/skydiscover/llm/base.py b/skydiscover/llm/base.py
@@ -11,10 +11,20 @@ class LLMResponse:
 
     text: generated text content.
     image_path: path to generated image file, or None for text-only.
+    model_name: resolved provider model name.
+    usage_source: how token usage was obtained ("api", "estimated", etc.).
     """
 
     text: str = ""
     image_path: Optional[str] = None
+    model_name: Optional[str] = None
+    usage_source: Optional[str] = None
+    input_tokens: int = 0
+    output_tokens: int = 0
+    total_tokens: int = 0
+    input_cost_usd: float = 0.0
+    output_cost_usd: float = 0.0
+    total_cost_usd: float = 0.0
 
 
 class LLMInterface(ABC):
-Original file line number
+Diff line change
@@ Expand Up / @@ -14,6 +14,8 @@ build/ @@
     venv/
     env/
+    COST_TRACKING_CHANGELOG.md
     # IDE
     .idea/
     .vscode/
@@ Expand Down @@