Skip to content

Commit 0638598

Browse files
committed
Add more granularity around 1000ms in the Claim startup metrics
1 parent 24ffa48 commit 0638598

4 files changed

Lines changed: 16 additions & 10 deletions

File tree

dev/load-test/test-recipes/README-rapid-burst.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,12 +44,18 @@ Before running this test, ensure the following prerequisites are met:
4444
args:
4545
- --leader-elect=true
4646
- --extensions
47+
- --enable-pprof-debug
48+
- --enable-tracing
49+
- --zap-log-level=debug
50+
- --zap-encoder=json
4751
- --kube-api-qps=1000
4852
- --kube-api-burst=1000
4953
- --sandbox-concurrent-workers=1000
5054
- --sandbox-claim-concurrent-workers=1000
5155
- --sandbox-warm-pool-concurrent-workers=1000
5256
```
57+
- If you are using tracing, see [GKE OTLP Metrics](https://docs.cloud.google.com/stackdriver/docs/otlp-metrics/deploy-collector)
58+
for how to deploy the collector.
5359
- Apply your modified manifests to your cluster to install the agent-sandbox controller.
5460
```bash
5561
cd ~/agent-sandbox

dev/load-test/test-recipes/run_rapid_burst.sh

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,15 @@ if [ -n "$1" ]; then
2020
fi
2121

2222
# BURST_SIZE * TOTAL_BURSTS = Total sandbox claims created
23-
BURST_SIZE=1000
24-
QPS=1000
25-
TOTAL_BURSTS=10
26-
WARMPOOL_SIZE=1000
23+
BURST_SIZE=300
24+
QPS=300
25+
TOTAL_BURSTS=2
26+
WARMPOOL_SIZE=600
2727
RUNTIME_CLASS="" # Change to "gvisor" if your cluster supports it
2828

2929
# Update these paths to match your environment
3030
# Clusterloader2 must be cloned or forked from https://github.com/kubernetes/perf-tests
31-
CL2_DIR="${HOME}/perf-tests/clusterloader2"
31+
CL2_DIR="${HOME}/oss/perf-tests/clusterloader2"
3232
AGENTS_DIR="${HOME}/agent-sandbox"
3333
TEST_DIR="${AGENTS_DIR}/dev/load-test/test-recipes"
3434
TEST_CONFIG="${TEST_DIR}/rapid-burst-test.yaml"

extensions/controllers/sandboxclaim_controller.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -833,7 +833,7 @@ func (r *SandboxClaimReconciler) recordCreationLatencyMetric(
833833
asmetrics.RecordClaimStartupLatency(claim.CreationTimestamp.Time, launchType, claim.Spec.TemplateRef.Name)
834834

835835
// Record controller startup latency
836-
if claim.Annotations != nil && claim.Annotations[observabilityAnnotation] != "" {
836+
if claim.Annotations[observabilityAnnotation] != "" {
837837
observedTimeString := claim.Annotations[observabilityAnnotation]
838838
observedTime, err := time.Parse(time.RFC3339Nano, observedTimeString)
839839
if err != nil {

internal/metrics/metrics.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,8 @@ var (
3737
prometheus.HistogramOpts{
3838
Name: "agent_sandbox_claim_startup_latency_ms",
3939
Help: "End-to-end latency from SandboxClaim creation to Sandbox Ready state in milliseconds.",
40-
// Buckets for latency from 50ms to 4 minutes
41-
Buckets: []float64{50, 100, 250, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000, 240000},
40+
// Buckets for latency from 100ms to 4 minutes
41+
Buckets: []float64{100, 250, 500, 750, 1000, 1250, 1500, 2000, 2500, 5000, 10000, 30000, 60000, 120000, 240000},
4242
},
4343
[]string{"launch_type", "sandbox_template"},
4444
)
@@ -51,8 +51,8 @@ var (
5151
prometheus.HistogramOpts{
5252
Name: "agent_sandbox_claim_controller_startup_latency_ms",
5353
Help: "Latency from controller first observed SandboxClaim to Sandbox Ready state in milliseconds.",
54-
// Buckets for latency from 50ms to 4 minutes
55-
Buckets: []float64{50, 100, 250, 500, 1000, 2500, 5000, 10000, 30000, 60000, 120000, 240000},
54+
// Buckets for latency from 100ms to 4 minutes
55+
Buckets: []float64{100, 250, 500, 750, 1000, 1250, 1500, 2000, 2500, 5000, 10000, 30000, 60000, 120000, 240000},
5656
},
5757
[]string{"launch_type", "sandbox_template"},
5858
)

0 commit comments

Comments
 (0)