From a2b2e1ca9aa5c9487ec302b4c3c71e828e319a0d Mon Sep 17 00:00:00 2001 From: Edmund Miller Date: Fri, 24 Apr 2026 13:50:10 -0500 Subject: [PATCH 1/3] fix(benchmark-report): drop task cost fallback Stop treating Seqera task cost as a fallback when no CUR data is present. Normalized task rows now emit cost as null and aggregation reports zero without CUR input. Tests: nf-test test --profile=+docker --verbose --- .../bin/benchmark_report_aggregate.py | 22 ++++++++----------- .../tests/test_aggregate.py | 4 ++-- .../bin/benchmark_report_normalize.py | 2 +- .../tests/test_normalize.py | 2 +- 4 files changed, 13 insertions(+), 17 deletions(-) diff --git a/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py b/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py index 26b8427..af62477 100644 --- a/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py +++ b/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py @@ -62,13 +62,13 @@ def _lookup_cost( return None -def _cost_or_task(cost_row: dict[str, Any] | None, key: str, task_cost: float, default: float = 0.0) -> float: +def _cost_or_task(cost_row: dict[str, Any] | None, key: str, default: float = 0.0) -> float: if not cost_row: - return task_cost if key in {"cost", "used_cost"} else default + return default value = cost_row.get(key) if value is None: - return task_cost if key in {"cost", "used_cost"} else default + return default return float(value) @@ -311,21 +311,17 @@ def build_report_data(jsonl_dir: Path, include_failed_runs: bool = False) -> dic } cost_row = _lookup_cost(costs_index, run_id=run_id, process=process, process_short=process_short, hash_short=hash_short) - task_cost = float(t.get("cost") or 0.0) if cost_row: - run_cost_acc[run_group_key]["cost"] += _cost_or_task(cost_row, "cost", task_cost) - run_cost_acc[run_group_key]["used_cost"] += _cost_or_task(cost_row, "used_cost", task_cost) - run_cost_acc[run_group_key]["unused_cost"] += _cost_or_task(cost_row, "unused_cost", task_cost, default=0.0) - else: - run_cost_acc[run_group_key]["cost"] += task_cost - run_cost_acc[run_group_key]["used_cost"] += task_cost + run_cost_acc[run_group_key]["cost"] += _cost_or_task(cost_row, "cost") + run_cost_acc[run_group_key]["used_cost"] += _cost_or_task(cost_row, "used_cost") + run_cost_acc[run_group_key]["unused_cost"] += _cost_or_task(cost_row, "unused_cost") if has_cost_rows: overview_key = (group, process_short) - cost_group_acc[overview_key]["total_cost"] += _cost_or_task(cost_row, "cost", task_cost) - cost_group_acc[overview_key]["used_cost"] += _cost_or_task(cost_row, "used_cost", task_cost) - cost_group_acc[overview_key]["unused_cost"] += _cost_or_task(cost_row, "unused_cost", task_cost, default=0.0) + cost_group_acc[overview_key]["total_cost"] += _cost_or_task(cost_row, "cost") + cost_group_acc[overview_key]["used_cost"] += _cost_or_task(cost_row, "used_cost") + cost_group_acc[overview_key]["unused_cost"] += _cost_or_task(cost_row, "unused_cost") cost_group_acc[overview_key]["n_tasks"] += 1 status = t.get("status") diff --git a/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py b/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py index dfe7bbd..b7d0aed 100644 --- a/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py +++ b/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py @@ -31,14 +31,14 @@ def test_build_report_data_has_all_sections(tmp_path, make_run, flat_task, write } -def test_run_costs_without_cur_uses_task_cost(tmp_path, make_run, flat_task, write_run_json): +def test_run_costs_without_cur_are_zero(tmp_path, make_run, flat_task, write_run_json): data_dir = tmp_path / "data" jsonl_dir = tmp_path / "jsonl_bundle" write_run_json(data_dir, [make_run(tasks=[flat_task(cost=4.2)])]) normalize_jsonl(data_dir, jsonl_dir) data = build_report_data(jsonl_dir) - assert data["run_costs"][0]["cost"] == 4.2 + assert data["run_costs"][0]["cost"] == 0.0 assert data["run_costs"][0]["used_cost"] is None diff --git a/modules/local/normalize_benchmark_jsonl/bin/benchmark_report_normalize.py b/modules/local/normalize_benchmark_jsonl/bin/benchmark_report_normalize.py index 02e2066..e8a3849 100644 --- a/modules/local/normalize_benchmark_jsonl/bin/benchmark_report_normalize.py +++ b/modules/local/normalize_benchmark_jsonl/bin/benchmark_report_normalize.py @@ -179,7 +179,7 @@ def extract_tasks(runs: list[dict[str, Any]]) -> list[dict[str, Any]]: "peak_rss": task.get("peakRss", 0), "read_bytes": task.get("readBytes", 0), "write_bytes": task.get("writeBytes", 0), - "cost": task.get("cost"), + "cost": None, "executor": task.get("executor", ""), "machine_type": task.get("machineType", ""), "cloud_zone": task.get("cloudZone", ""), diff --git a/modules/local/normalize_benchmark_jsonl/tests/test_normalize.py b/modules/local/normalize_benchmark_jsonl/tests/test_normalize.py index d278a17..47d3f20 100644 --- a/modules/local/normalize_benchmark_jsonl/tests/test_normalize.py +++ b/modules/local/normalize_benchmark_jsonl/tests/test_normalize.py @@ -24,7 +24,7 @@ def test_cached_count_extracted(make_run, flat_task): def test_nested_tasks_unwrapped(make_run, nested_task): run = make_run(tasks=[nested_task(cost=2.0), nested_task(cost=3.0)]) rows = extract_tasks([run]) - assert sum(r["cost"] for r in rows) == 5.0 + assert all(r["cost"] is None for r in rows) def test_failed_tasks_filtered(make_run, flat_task): From 60a6f0a908a2526e390124c390fab94d2360603f Mon Sep 17 00:00:00 2001 From: Cursor Agent Date: Fri, 1 May 2026 16:20:55 +0000 Subject: [PATCH 2/3] feat: warn on missing CUR cost coverage Co-authored-by: Florian Wuennemann --- .../bin/benchmark_report_aggregate.py | 80 ++++++++++++- .../tests/test_aggregate.py | 111 ++++++++++++++++++ .../bin/benchmark_report_template.html | 64 ++++++++++ .../tests/test_render.py | 38 ++++++ 4 files changed, 292 insertions(+), 1 deletion(-) diff --git a/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py b/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py index af62477..9cc6f6f 100644 --- a/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py +++ b/modules/local/aggregate_benchmark_report_data/bin/benchmark_report_aggregate.py @@ -73,6 +73,26 @@ def _cost_or_task(cost_row: dict[str, Any] | None, key: str, default: float = 0. return float(value) +def _summarize_missing_processes( + missing_process_counts: dict[str, int], preview_limit: int = 3 +) -> tuple[list[dict[str, Any]], str]: + ordered = sorted(missing_process_counts.items(), key=lambda item: (-item[1], item[0])) + preview = [ + {"process_short": process_short, "missing_tasks": missing_tasks} + for process_short, missing_tasks in ordered[:preview_limit] + ] + parts = [ + f"{item['process_short']} ({item['missing_tasks']})" + if item["missing_tasks"] != 1 + else item["process_short"] + for item in preview + ] + hidden_count = max(len(ordered) - preview_limit, 0) + if hidden_count: + parts.append(f"+{hidden_count} more") + return preview, ", ".join(parts) + + def _is_highlight_process(process: str) -> bool: process_lc = process.lower() return any(keyword in process_lc for keyword in _HIGHLIGHT_KEYWORDS) @@ -240,9 +260,11 @@ def build_report_data(jsonl_dir: Path, include_failed_runs: bool = False) -> dic "unused_cost": 0.0, } + costs_jsonl_path = jsonl_dir / "costs.jsonl" + cur_supplied = costs_jsonl_path.exists() costs_index: dict[tuple[str, str, str], dict[str, Any]] = {} has_cost_rows = False - for c in _iter_jsonl(jsonl_dir / "costs.jsonl"): + for c in _iter_jsonl(costs_jsonl_path): has_cost_rows = True run_id = str(c.get("run_id", "")) process = str(c.get("process", "")) @@ -289,6 +311,10 @@ def build_report_data(jsonl_dir: Path, include_failed_runs: bool = False) -> dic task_run_acc: dict[str, dict[str, float]] = defaultdict( lambda: {"requested_cpu_h": 0.0, "requested_mem_gib_h": 0.0, "real_cpu_h": 0.0, "real_mem_gib_h": 0.0} ) + cost_coverage_runs: dict[tuple[str, str], dict[str, Any]] = {} + total_cost_tasks = 0 + matched_cost_tasks = 0 + missing_cost_tasks = 0 for t in _iter_jsonl(jsonl_dir / "tasks.jsonl"): run_id = str(t.get("run_id", "")) @@ -312,6 +338,29 @@ def build_report_data(jsonl_dir: Path, include_failed_runs: bool = False) -> dic cost_row = _lookup_cost(costs_index, run_id=run_id, process=process, process_short=process_short, hash_short=hash_short) + if cur_supplied: + total_cost_tasks += 1 + coverage = cost_coverage_runs.setdefault( + run_group_key, + { + "run_id": run_id, + "group": group, + "total_tasks": 0, + "matched_tasks": 0, + "missing_tasks": 0, + "missing_process_counts": defaultdict(int), + }, + ) + coverage["total_tasks"] += 1 + if cost_row: + matched_cost_tasks += 1 + coverage["matched_tasks"] += 1 + else: + missing_cost_tasks += 1 + coverage["missing_tasks"] += 1 + missing_process = process_short or process or "unknown" + coverage["missing_process_counts"][missing_process] += 1 + if cost_row: run_cost_acc[run_group_key]["cost"] += _cost_or_task(cost_row, "cost") run_cost_acc[run_group_key]["used_cost"] += _cost_or_task(cost_row, "used_cost") @@ -527,6 +576,34 @@ def build_report_data(jsonl_dir: Path, include_failed_runs: bool = False) -> dic ] cost_overview.sort(key=lambda x: float(x.get("total_cost") or 0), reverse=True) + runs_with_missing_costs = [] + for row in cost_coverage_runs.values(): + if int(row["missing_tasks"]) <= 0: + continue + missing_processes, missing_process_summary = _summarize_missing_processes(row["missing_process_counts"]) + runs_with_missing_costs.append( + { + "run_id": row["run_id"], + "group": row["group"], + "total_tasks": int(row["total_tasks"]), + "matched_tasks": int(row["matched_tasks"]), + "missing_tasks": int(row["missing_tasks"]), + "missing_processes": missing_processes, + "missing_process_summary": missing_process_summary, + } + ) + runs_with_missing_costs.sort(key=lambda row: (-row["missing_tasks"], str(row["group"]), str(row["run_id"]))) + + cost_coverage = { + "cur_supplied": cur_supplied, + "has_any_cost_rows": has_cost_rows, + "total_included_tasks": total_cost_tasks, + "matched_task_count": matched_cost_tasks, + "missing_task_count": missing_cost_tasks, + "coverage_pct": _round((matched_cost_tasks / total_cost_tasks) * 100.0, 1) if total_cost_tasks else None, + "runs_with_missing_costs": runs_with_missing_costs, + } + combined_task_runtime = [] for (pipeline, group), panel_acc in sorted(combined_runtime_acc.items(), key=lambda x: (x[0][0], x[0][1])): process_runtime_ms = panel_acc["process_runtime_ms"] @@ -596,6 +673,7 @@ def build_report_data(jsonl_dir: Path, include_failed_runs: bool = False) -> dic "task_table": task_table, "task_scatter": task_scatter, "cost_overview": cost_overview, + "cost_coverage": cost_coverage, } diff --git a/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py b/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py index b7d0aed..158145e 100644 --- a/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py +++ b/modules/local/aggregate_benchmark_report_data/tests/test_aggregate.py @@ -28,6 +28,7 @@ def test_build_report_data_has_all_sections(tmp_path, make_run, flat_task, write "task_table", "task_scatter", "cost_overview", + "cost_coverage", } @@ -40,6 +41,15 @@ def test_run_costs_without_cur_are_zero(tmp_path, make_run, flat_task, write_run data = build_report_data(jsonl_dir) assert data["run_costs"][0]["cost"] == 0.0 assert data["run_costs"][0]["used_cost"] is None + assert data["cost_coverage"] == { + "cur_supplied": False, + "has_any_cost_rows": False, + "total_included_tasks": 0, + "matched_task_count": 0, + "missing_task_count": 0, + "coverage_pct": None, + "runs_with_missing_costs": [], + } def test_cur_zero_costs_do_not_fall_back_to_task_cost(tmp_path): @@ -101,6 +111,107 @@ def test_cur_zero_costs_do_not_fall_back_to_task_cost(tmp_path): assert data["run_costs"][0]["unused_cost"] == 0.0 assert data["cost_overview"][0]["total_cost"] == 0.0 assert data["cost_overview"][0]["used_cost"] == 0.0 + assert data["cost_coverage"]["cur_supplied"] is True + assert data["cost_coverage"]["coverage_pct"] == 100.0 + assert data["cost_coverage"]["missing_task_count"] == 0 + + +def test_partial_cur_coverage_is_reported_per_run_and_process(tmp_path): + jsonl_dir = tmp_path / "jsonl_bundle" + jsonl_dir.mkdir(parents=True) + + runs = [ + { + "run_id": "run1", + "group": "cpu", + "pipeline": "pipe", + "username": "u", + "pipeline_version": "main", + "nextflow_version": "24.10.0", + "platform_version": "x", + "succeeded": 2, + "failed": 0, + "cached": 0, + "executor": "awsbatch", + "region": "us-east-1", + "fusion_enabled": False, + "wave_enabled": False, + "container_engine": "docker", + "duration_ms": 10, + "cpu_time_ms": 1000, + "cpu_efficiency": 50.0, + "memory_efficiency": 50.0, + "read_bytes": 0, + "write_bytes": 0, + } + ] + tasks = [ + { + "run_id": "run1", + "group": "cpu", + "hash": "ab/cdef12", + "process": "foo:PROC_A", + "process_short": "PROC_A", + "name": "PROC_A", + "status": "COMPLETED", + "staging_ms": 0, + "realtime_ms": 1000, + "duration_ms": 1000, + "cost": None, + }, + { + "run_id": "run1", + "group": "cpu", + "hash": "ab/cdef13", + "process": "foo:PROC_B", + "process_short": "PROC_B", + "name": "PROC_B", + "status": "COMPLETED", + "staging_ms": 0, + "realtime_ms": 1000, + "duration_ms": 1000, + "cost": None, + }, + { + "run_id": "run1", + "group": "cpu", + "hash": "ab/cdef14", + "process": "foo:PROC_B", + "process_short": "PROC_B", + "name": "PROC_B_retry", + "status": "CACHED", + "staging_ms": 0, + "realtime_ms": 1000, + "duration_ms": 1000, + "cost": None, + }, + ] + costs = [ + {"run_id": "run1", "process": "foo:PROC_A", "hash": "abcdef12", "cost": 5.0, "used_cost": 4.0, "unused_cost": 1.0} + ] + + (jsonl_dir / "runs.jsonl").write_text("".join(json.dumps(r) + "\n" for r in runs)) + (jsonl_dir / "tasks.jsonl").write_text("".join(json.dumps(t) + "\n" for t in tasks)) + (jsonl_dir / "costs.jsonl").write_text("".join(json.dumps(c) + "\n" for c in costs)) + + data = build_report_data(jsonl_dir) + + assert data["run_costs"][0]["cost"] == 5.0 + assert data["cost_coverage"]["cur_supplied"] is True + assert data["cost_coverage"]["has_any_cost_rows"] is True + assert data["cost_coverage"]["total_included_tasks"] == 3 + assert data["cost_coverage"]["matched_task_count"] == 1 + assert data["cost_coverage"]["missing_task_count"] == 2 + assert data["cost_coverage"]["coverage_pct"] == 33.3 + + run_warning = data["cost_coverage"]["runs_with_missing_costs"][0] + assert run_warning["run_id"] == "run1" + assert run_warning["group"] == "cpu" + assert run_warning["total_tasks"] == 3 + assert run_warning["matched_tasks"] == 1 + assert run_warning["missing_tasks"] == 2 + assert run_warning["missing_process_summary"] == "PROC_B (2)" + assert run_warning["missing_processes"] == [{"process_short": "PROC_B", "missing_tasks": 2}] def test_task_table_includes_cached(tmp_path, make_run, flat_task, write_run_json): diff --git a/modules/local/render_benchmark_report/bin/benchmark_report_template.html b/modules/local/render_benchmark_report/bin/benchmark_report_template.html index f6a2bc1..10d426b 100644 --- a/modules/local/render_benchmark_report/bin/benchmark_report_template.html +++ b/modules/local/render_benchmark_report/bin/benchmark_report_template.html @@ -53,6 +53,37 @@ .callout-header:hover { background: {{ brand_accent_surface }}; } .callout-body { padding: 12px; display: none; } .callout-body.show { display: block; } + .warning-card { + border: 1px solid #f4b548; + background: #fff8eb; + border-radius: 8px; + padding: 12px 14px; + margin-bottom: 18px; + } + .warning-card h3 { + margin: 0 0 6px; + font-size: 15px; + color: {{ brand_heading }}; + } + .warning-card p { + margin: 0; + font-size: 13px; + color: {{ brand_heading }}; + } + .warning-card ul { + margin: 10px 0 0 18px; + padding: 0; + } + .warning-card li { + margin: 3px 0; + color: {{ brand_heading }}; + } + .warning-card code { + background: rgba(255,255,255,0.65); + border: 1px solid rgba(0,0,0,0.06); + border-radius: 4px; + padding: 0 4px; + } .dl-row { display: flex; gap: 20px; font-size: 13px; margin-bottom: 8px; flex-wrap: wrap; } .dl-row dt { font-weight: 600; color: {{ brand_heading }}; } @@ -230,6 +261,8 @@

Run overview

task, and cost sections only use included runs.

+
+

Run summary

Summary of pipeline execution and infrastructure settings.

@@ -590,6 +623,37 @@

Sav const groupColor = {}; overviewGroups.forEach((g,i) => { groupColor[g] = COLORS[i % COLORS.length]; }); +// 1.1 CUR coverage warning +(function() { + const coverage = DATA.cost_coverage || {}; + if (!coverage.cur_supplied || (coverage.missing_task_count || 0) <= 0) return; + const mount = document.getElementById('cost-coverage-warning'); + if (!mount) return; + + const coveragePct = coverage.coverage_pct != null ? Number(coverage.coverage_pct).toFixed(1) + '%' : '\u2014'; + const rows = (coverage.runs_with_missing_costs || []).slice(0, 6).map(row => { + const total = Number(row.total_tasks || 0); + const missing = Number(row.missing_tasks || 0); + const matched = Number(row.matched_tasks || 0); + const pct = total > 0 ? ((matched / total) * 100).toFixed(0) + '%' : '\u2014'; + return '
  • ' + escapeHtml(row.group || row.run_id || 'run') + ': ' + + missing + ' missing of ' + total + ' task' + (total === 1 ? '' : 's') + + ' (' + pct + ' matched)' + + (row.missing_process_summary ? ' - ' + escapeHtml(row.missing_process_summary) : '') + + '
  • '; + }).join(''); + + mount.innerHTML = + '
    ' + + '

    Warning: CUR coverage is incomplete

    ' + + '

    ' + + 'A CUR file was supplied, but only ' + coveragePct + ' of included tasks matched CUR cost rows. ' + + 'Unmatched tasks stay out of the cost totals, so treat the dollar values as incomplete.' + + '

    ' + + '
      ' + rows + '
    ' + + '
    '; +})(); + // 2.1 Run summary table (with cached tasks) (function() { const runs = DATA.run_summary || []; diff --git a/modules/local/render_benchmark_report/tests/test_render.py b/modules/local/render_benchmark_report/tests/test_render.py index cfc90ed..91bdd21 100644 --- a/modules/local/render_benchmark_report/tests/test_render.py +++ b/modules/local/render_benchmark_report/tests/test_render.py @@ -61,6 +61,8 @@ def test_render_html(tmp_path, minimal_report_data): assert 'id="chart-pg-cpu-savings"' not in text assert 'target="_blank" rel="noopener noreferrer"' in text assert "r.runUrl ? ' Date: Fri, 1 May 2026 16:22:08 +0000 Subject: [PATCH 3/3] test: align CUR coverage render assertions Co-authored-by: Florian Wuennemann --- .../render_benchmark_report/tests/test_render.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/modules/local/render_benchmark_report/tests/test_render.py b/modules/local/render_benchmark_report/tests/test_render.py index 91bdd21..8ad3c46 100644 --- a/modules/local/render_benchmark_report/tests/test_render.py +++ b/modules/local/render_benchmark_report/tests/test_render.py @@ -62,7 +62,7 @@ def test_render_html(tmp_path, minimal_report_data): assert 'target="_blank" rel="noopener noreferrer"' in text assert "r.runUrl ? '