Skip to content

Commit f0e965c

Browse files
authored
Merge pull request #246 from whywaita/feat/metrisc-exponential-backoff
Add metrics about exponential backoff
2 parents f18ad79 + 6ee4998 commit f0e965c

File tree

4 files changed

+60
-1
lines changed

4 files changed

+60
-1
lines changed

pkg/runner/metrics.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package runner
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
"github.com/prometheus/client_golang/prometheus/promauto"
6+
)
7+
8+
var (
9+
// DeleteRunnerBackoffDuration is histogram of exponential backoff duration for deleting runner
10+
DeleteRunnerBackoffDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
11+
Namespace: "myshoes",
12+
Subsystem: "runner",
13+
Name: "delete_runner_backoff_duration_seconds",
14+
Help: "Histogram of exponential backoff duration in seconds for deleting runner",
15+
Buckets: prometheus.ExponentialBuckets(1, 2, 10), // 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s, 512s
16+
}, []string{"runner_uuid"})
17+
18+
// DeleteRunnerRetryTotal is counter of total retries for deleting runner
19+
DeleteRunnerRetryTotal = promauto.NewCounterVec(prometheus.CounterOpts{
20+
Namespace: "myshoes",
21+
Subsystem: "runner",
22+
Name: "delete_runner_retry_total",
23+
Help: "Total number of retries for deleting runner",
24+
}, []string{"runner_uuid"})
25+
)

pkg/runner/runner_delete.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,12 @@ func (m *Manager) removeRunners(ctx context.Context, t datastore.Target) error {
118118
sem.Release(1)
119119
ConcurrencyDeleting.Add(-1)
120120
}()
121-
time.Sleep(util.CalcRetryTime(count))
121+
sleep := util.CalcRetryTime(count)
122+
if count > 0 {
123+
DeleteRunnerRetryTotal.WithLabelValues(runner.UUID.String()).Inc()
124+
DeleteRunnerBackoffDuration.WithLabelValues(runner.UUID.String()).Observe(sleep.Seconds())
125+
}
126+
time.Sleep(sleep)
122127

123128
if err := m.removeRunner(cctx, t, runner, ghRunners); err != nil {
124129
DeleteRetryCount.Store(runner.UUID, count+1)

pkg/starter/metrics.go

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
package starter
2+
3+
import (
4+
"github.com/prometheus/client_golang/prometheus"
5+
"github.com/prometheus/client_golang/prometheus/promauto"
6+
)
7+
8+
var (
9+
// AddInstanceBackoffDuration is histogram of exponential backoff duration for adding instance
10+
AddInstanceBackoffDuration = promauto.NewHistogramVec(prometheus.HistogramOpts{
11+
Namespace: "myshoes",
12+
Subsystem: "starter",
13+
Name: "add_instance_backoff_duration_seconds",
14+
Help: "Histogram of exponential backoff duration in seconds for adding instance",
15+
Buckets: prometheus.ExponentialBuckets(1, 2, 10), // 1s, 2s, 4s, 8s, 16s, 32s, 64s, 128s, 256s, 512s
16+
}, []string{"job_uuid"})
17+
18+
// AddInstanceRetryTotal is counter of total retries for adding instance
19+
AddInstanceRetryTotal = promauto.NewCounterVec(prometheus.CounterOpts{
20+
Namespace: "myshoes",
21+
Subsystem: "starter",
22+
Name: "add_instance_retry_total",
23+
Help: "Total number of retries for adding instance",
24+
}, []string{"job_uuid"})
25+
)

pkg/starter/starter.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,10 @@ func (s *Starter) run(ctx context.Context, ch chan datastore.Job) error {
163163
inProgress.Store(job.UUID, struct{}{})
164164

165165
sleep := util.CalcRetryTime(count)
166+
if count > 0 {
167+
AddInstanceRetryTotal.WithLabelValues(job.UUID.String()).Inc()
168+
AddInstanceBackoffDuration.WithLabelValues(job.UUID.String()).Observe(sleep.Seconds())
169+
}
166170
go func(job datastore.Job, sleep time.Duration) {
167171
defer func() {
168172
sem.Release(1)

0 commit comments

Comments
 (0)