Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 31 additions & 22 deletions internal/runtime/executor/usage_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,7 @@ func (r *usageReporter) publishWithOutcome(ctx context.Context, detail usage.Det
return
}
r.once.Do(func() {
usage.PublishRecord(ctx, usage.Record{
Provider: r.provider,
Model: r.model,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
RequestedAt: r.requestedAt,
Failed: failed,
Detail: detail,
})
usage.PublishRecord(ctx, r.buildRecord(detail, failed))
})
}

Expand All @@ -96,20 +86,39 @@ func (r *usageReporter) ensurePublished(ctx context.Context) {
return
}
r.once.Do(func() {
usage.PublishRecord(ctx, usage.Record{
Provider: r.provider,
Model: r.model,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
RequestedAt: r.requestedAt,
Failed: false,
Detail: usage.Detail{},
})
usage.PublishRecord(ctx, r.buildRecord(usage.Detail{}, false))
})
}

func (r *usageReporter) buildRecord(detail usage.Detail, failed bool) usage.Record {
if r == nil {
return usage.Record{Detail: detail, Failed: failed}
}
return usage.Record{
Provider: r.provider,
Model: r.model,
Source: r.source,
APIKey: r.apiKey,
AuthID: r.authID,
AuthIndex: r.authIndex,
RequestedAt: r.requestedAt,
Latency: r.latency(),

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Capture latency after stream completion

Latency is fixed at the first call to buildRecord via Latency: r.latency(), but several streaming executors publish usage as soon as an intermediate usage event arrives (before the stream is finished). Since publishWithOutcome is protected by once.Do, long streams keep that early value and systematically under-report end-to-end request latency, making the new metric misleading for operational monitoring.

Useful? React with 👍 / 👎.

Failed: failed,
Detail: detail,
}
}

func (r *usageReporter) latency() time.Duration {
if r == nil || r.requestedAt.IsZero() {
return 0
}
latency := time.Since(r.requestedAt)
if latency < 0 {
return 0
}
return latency
}

func apiKeyFromContext(ctx context.Context) string {
if ctx == nil {
return ""
Expand Down
23 changes: 22 additions & 1 deletion internal/runtime/executor/usage_helpers_test.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
package executor

import "testing"
import (
"testing"
"time"

"github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
)

func TestParseOpenAIUsageChatCompletions(t *testing.T) {
data := []byte(`{"usage":{"prompt_tokens":1,"completion_tokens":2,"total_tokens":3,"prompt_tokens_details":{"cached_tokens":4},"completion_tokens_details":{"reasoning_tokens":5}}}`)
Expand Down Expand Up @@ -41,3 +46,19 @@ func TestParseOpenAIUsageResponses(t *testing.T) {
t.Fatalf("reasoning tokens = %d, want %d", detail.ReasoningTokens, 9)
}
}

func TestUsageReporterBuildRecordIncludesLatency(t *testing.T) {
reporter := &usageReporter{
provider: "openai",
model: "gpt-5.4",
requestedAt: time.Now().Add(-1500 * time.Millisecond),
}

record := reporter.buildRecord(usage.Detail{TotalTokens: 3}, false)
if record.Latency < time.Second {
t.Fatalf("latency = %v, want >= 1s", record.Latency)
}
if record.Latency > 3*time.Second {
t.Fatalf("latency = %v, want <= 3s", record.Latency)
}
}
14 changes: 13 additions & 1 deletion internal/usage/logger_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,10 @@ type modelStats struct {
Details []RequestDetail
}

// RequestDetail stores the timestamp and token usage for a single request.
// RequestDetail stores the timestamp, latency, and token usage for a single request.
type RequestDetail struct {
Timestamp time.Time `json:"timestamp"`
LatencyMs int64 `json:"latency_ms"`
Source string `json:"source"`
AuthIndex string `json:"auth_index"`
Tokens TokenStats `json:"tokens"`
Expand Down Expand Up @@ -198,6 +199,7 @@ func (s *RequestStatistics) Record(ctx context.Context, record coreusage.Record)
}
s.updateAPIStats(stats, modelName, RequestDetail{
Timestamp: timestamp,
LatencyMs: normaliseLatency(record.Latency),
Source: record.Source,
AuthIndex: record.AuthIndex,
Tokens: detail,
Expand Down Expand Up @@ -332,6 +334,9 @@ func (s *RequestStatistics) MergeSnapshot(snapshot StatisticsSnapshot) MergeResu
}
for _, detail := range modelSnapshot.Details {
detail.Tokens = normaliseTokenStats(detail.Tokens)
if detail.LatencyMs < 0 {
detail.LatencyMs = 0
}
if detail.Timestamp.IsZero() {
detail.Timestamp = time.Now()
}
Expand Down Expand Up @@ -463,6 +468,13 @@ func normaliseTokenStats(tokens TokenStats) TokenStats {
return tokens
}

func normaliseLatency(latency time.Duration) int64 {
if latency <= 0 {
return 0
}
return latency.Milliseconds()
}

func formatHour(hour int) string {
if hour < 0 {
hour = 0
Expand Down
96 changes: 96 additions & 0 deletions internal/usage/logger_plugin_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
package usage

import (
"context"
"testing"
"time"

coreusage "github.com/router-for-me/CLIProxyAPI/v6/sdk/cliproxy/usage"
)

func TestRequestStatisticsRecordIncludesLatency(t *testing.T) {
stats := NewRequestStatistics()
stats.Record(context.Background(), coreusage.Record{
APIKey: "test-key",
Model: "gpt-5.4",
RequestedAt: time.Date(2026, 3, 20, 12, 0, 0, 0, time.UTC),
Latency: 1500 * time.Millisecond,
Detail: coreusage.Detail{
InputTokens: 10,
OutputTokens: 20,
TotalTokens: 30,
},
})

snapshot := stats.Snapshot()
details := snapshot.APIs["test-key"].Models["gpt-5.4"].Details
if len(details) != 1 {
t.Fatalf("details len = %d, want 1", len(details))
}
if details[0].LatencyMs != 1500 {
t.Fatalf("latency_ms = %d, want 1500", details[0].LatencyMs)
}
}

func TestRequestStatisticsMergeSnapshotDedupIgnoresLatency(t *testing.T) {
stats := NewRequestStatistics()
timestamp := time.Date(2026, 3, 20, 12, 0, 0, 0, time.UTC)
first := StatisticsSnapshot{
APIs: map[string]APISnapshot{
"test-key": {
Models: map[string]ModelSnapshot{
"gpt-5.4": {
Details: []RequestDetail{{
Timestamp: timestamp,
LatencyMs: 0,
Source: "[email protected]",
AuthIndex: "0",
Tokens: TokenStats{
InputTokens: 10,
OutputTokens: 20,
TotalTokens: 30,
},
}},
},
},
},
},
}
second := StatisticsSnapshot{
APIs: map[string]APISnapshot{
"test-key": {
Models: map[string]ModelSnapshot{
"gpt-5.4": {
Details: []RequestDetail{{
Timestamp: timestamp,
LatencyMs: 2500,
Source: "[email protected]",
AuthIndex: "0",
Tokens: TokenStats{
InputTokens: 10,
OutputTokens: 20,
TotalTokens: 30,
},
}},
},
},
},
},
}

result := stats.MergeSnapshot(first)
if result.Added != 1 || result.Skipped != 0 {
t.Fatalf("first merge = %+v, want added=1 skipped=0", result)
}

result = stats.MergeSnapshot(second)
if result.Added != 0 || result.Skipped != 1 {
t.Fatalf("second merge = %+v, want added=0 skipped=1", result)
}

snapshot := stats.Snapshot()
details := snapshot.APIs["test-key"].Models["gpt-5.4"].Details
if len(details) != 1 {
t.Fatalf("details len = %d, want 1", len(details))
}
}
Comment on lines +35 to +96
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The setup for first and second StatisticsSnapshot instances involves a lot of duplicated code, making the test harder to read and maintain. To improve this, you could create a helper function that builds the StatisticsSnapshot and takes the differing values (like LatencyMs) as parameters. This would reduce boilerplate and make the core logic of the test—what is actually different between the two snapshots—more apparent.

1 change: 1 addition & 0 deletions sdk/cliproxy/usage/manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type Record struct {
AuthIndex string
Source string
RequestedAt time.Time
Latency time.Duration
Failed bool
Detail Detail
}
Expand Down
Loading