Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"sort"
"sync"
"sync/atomic"
"time"
)

const maxSchema = 8
Expand Down Expand Up @@ -214,6 +215,56 @@ type Params struct {
ErrorBound float64
}

// Common Params presets, modeled after the bucket tiers in CockroachDB's
// pkg/util/metric/histogram_buckets.go and Prometheus DefBuckets.
//
// Time-based presets expect values in nanoseconds, matching Go's
// time.Duration. Record with int64(duration).
var (
// HiResLatencyParams covers high-resolution latency from 1us to 5m.
// Use for: end-to-end request latencies where you need visibility into
// both fast-path sub-millisecond operations and slow tail outliers.
HiResLatencyParams = Params{
Lo: float64(time.Microsecond),
Hi: float64(5 * time.Minute),
}

// IOLatencyParams covers fast I/O operations from 10us to 10s.
// Use for: RPC latencies, raft operations, disk I/O, network round-trips.
IOLatencyParams = Params{
Lo: float64(10 * time.Microsecond),
Hi: float64(10 * time.Second),
}

// ResponseTimeParams covers request/response latencies from 1ms to 30s.
// Use for: SQL query execution, HTTP handlers, API response times.
ResponseTimeParams = Params{
Lo: float64(time.Millisecond),
Hi: float64(30 * time.Second),
}

// LongRunningParams covers long-running operations from 500ms to 1h.
// Use for: backups, restores, migrations, bulk ingestion jobs.
LongRunningParams = Params{
Lo: float64(500 * time.Millisecond),
Hi: float64(time.Hour),
}

// DataSizeParams covers data payload sizes from 1KB to 16MB (in bytes).
// Use for: message sizes, request/response bodies, SST sizes.
DataSizeParams = Params{
Lo: 1024,
Hi: 16 * 1024 * 1024,
}

// MemoryUsageParams covers memory tracking from 1B to 64MB (in bytes).
// Use for: memory allocations, buffer sizes, cache entry sizes.
MemoryUsageParams = Params{
Lo: 1,
Hi: 64 * 1024 * 1024,
}
)

func (p Params) withDefaults() Params {
if p.Lo == 0 {
p.Lo = 1
Expand Down
141 changes: 141 additions & 0 deletions histogram_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,147 @@ func TestQuantileEdgeCases(t *testing.T) {
})
}

func TestValuesAtQuantiles(t *testing.T) {
t.Run("matches individual calls", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1e6, ErrorBound: 0.05})
rng := rand.New(rand.NewSource(42))
for i := 0; i < 10000; i++ {
h.Record(int64(rng.Float64()*999999) + 1)
}
snap := h.Snapshot()
qs := []float64{0, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 0.999, 1.0}
batch := snap.ValuesAtQuantiles(qs)
for i, q := range qs {
single := snap.ValueAtQuantile(q)
require.InDeltaf(t, single, batch[i], 1e-9,
"q=%.3f: single=%.6f batch=%.6f", q, single, batch[i])
}
})

t.Run("preserves input order", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05})
for i := int64(1); i <= 1000; i++ {
h.Record(i)
}
snap := h.Snapshot()
// Deliberately unsorted quantiles.
qs := []float64{0.99, 0.25, 0.75, 0.50, 0.10}
batch := snap.ValuesAtQuantiles(qs)
for i, q := range qs {
single := snap.ValueAtQuantile(q)
require.InDeltaf(t, single, batch[i], 1e-9,
"q=%.2f at index %d", q, i)
}
})

t.Run("empty histogram", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05})
snap := h.Snapshot()
results := snap.ValuesAtQuantiles([]float64{0, 0.5, 1.0})
for i, r := range results {
require.Equalf(t, 0.0, r, "index %d", i)
}
})

t.Run("empty quantiles slice", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05})
h.Record(500)
snap := h.Snapshot()
require.Empty(t, snap.ValuesAtQuantiles([]float64{}))
require.Empty(t, snap.ValuesAtQuantiles(nil))
})

t.Run("single quantile", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05})
for i := int64(1); i <= 100; i++ {
h.Record(i)
}
snap := h.Snapshot()
batch := snap.ValuesAtQuantiles([]float64{0.50})
single := snap.ValueAtQuantile(0.50)
require.InDelta(t, single, batch[0], 1e-9)
})

t.Run("all zeros", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05})
for i := 0; i < 100; i++ {
h.Record(0)
}
snap := h.Snapshot()
qs := []float64{0, 0.50, 0.99, 1.0}
batch := snap.ValuesAtQuantiles(qs)
for i, q := range qs {
single := snap.ValueAtQuantile(q)
require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q)
}
})

t.Run("all underflow", func(t *testing.T) {
h := New(Params{Lo: 100, Hi: 10000, ErrorBound: 0.05})
for i := int64(1); i <= 50; i++ {
h.Record(i)
}
snap := h.Snapshot()
qs := []float64{0, 0.50, 0.99, 1.0}
batch := snap.ValuesAtQuantiles(qs)
for i, q := range qs {
single := snap.ValueAtQuantile(q)
require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q)
}
})

t.Run("all overflow", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 100, ErrorBound: 0.05})
for i := int64(200); i <= 300; i++ {
h.Record(i)
}
snap := h.Snapshot()
qs := []float64{0, 0.50, 0.99, 1.0}
batch := snap.ValuesAtQuantiles(qs)
for i, q := range qs {
single := snap.ValueAtQuantile(q)
require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q)
}
})

t.Run("mixed underflow overflow and in-range", func(t *testing.T) {
h := New(Params{Lo: 100, Hi: 10000, ErrorBound: 0.05})
for i := 0; i < 10; i++ {
h.Record(0)
}
for i := int64(1); i <= 10; i++ {
h.Record(i)
}
for i := int64(500); i <= 559; i++ {
h.Record(i)
}
for i := int64(20000); i <= 20019; i++ {
h.Record(i)
}
snap := h.Snapshot()
qs := []float64{0, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 1.0}
batch := snap.ValuesAtQuantiles(qs)
for i, q := range qs {
single := snap.ValueAtQuantile(q)
require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q)
}
})

t.Run("duplicate quantiles", func(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05})
for i := int64(1); i <= 100; i++ {
h.Record(i)
}
snap := h.Snapshot()
qs := []float64{0.50, 0.50, 0.50}
batch := snap.ValuesAtQuantiles(qs)
single := snap.ValueAtQuantile(0.50)
for i := range qs {
require.InDeltaf(t, single, batch[i], 1e-9, "index %d", i)
}
})
}

func TestMeanAndTotal(t *testing.T) {
h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05})
h.Record(100)
Expand Down
136 changes: 135 additions & 1 deletion quantile.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@

package goodhistogram

import "math"
import (
"math"
"sort"
)

// ValueAtQuantile returns the estimated value at the given quantile q ∈ [0, 1]
// using trapezoidal interpolation.
Expand Down Expand Up @@ -130,6 +133,137 @@ func (s *Snapshot) ValueAtQuantile(q float64) float64 {
return s.cfg.boundaries[n]
}

// ValuesAtQuantiles returns the estimated values at the given quantiles
// in a single pass through the bucket array. The input quantiles should be
// in [0, 1]. Results are returned in the same order as the input quantiles.
//
// This is more efficient than calling ValueAtQuantile in a loop because it
// computes bucket densities once and walks the bucket array once regardless
// of how many quantiles are requested.
func (s *Snapshot) ValuesAtQuantiles(qs []float64) []float64 {
results := make([]float64, len(qs))
if len(qs) == 0 || s.TotalCount == 0 {
return results
}

belowLo := float64(s.ZeroCount + s.Underflow)
var inRangeCount uint64
for _, c := range s.Counts {
inRangeCount += c
}

// Resolve edge cases per-quantile and collect those needing a bucket walk.
type walkEntry struct {
idx int // index in results
rank float64 // adjusted rank within in-range buckets
}
var walk []walkEntry
for i, q := range qs {
rank := q * float64(s.TotalCount)
if rank <= 0 {
if s.ZeroCount+s.Underflow > 0 {
results[i] = s.cfg.lo
} else {
results[i] = s.cfg.hi
for j, c := range s.Counts {
if c > 0 {
results[i] = s.cfg.boundaries[j]
break
}
}
}
continue
}
if rank >= float64(s.TotalCount) {
if s.Overflow > 0 {
results[i] = s.cfg.hi
} else {
results[i] = s.cfg.lo
for j := len(s.Counts) - 1; j >= 0; j-- {
if s.Counts[j] > 0 {
results[i] = s.cfg.boundaries[j+1]
break
}
}
}
continue
}
if rank <= belowLo {
results[i] = s.cfg.lo
continue
}
adjusted := rank - belowLo
if adjusted > float64(inRangeCount) {
results[i] = s.cfg.hi
continue
}
walk = append(walk, walkEntry{idx: i, rank: adjusted})
}

if len(walk) == 0 {
return results
}

// Sort by rank for a single ascending pass through the buckets.
sort.Slice(walk, func(i, j int) bool {
return walk[i].rank < walk[j].rank
})

n := len(s.Counts)

// Compute densities once (same as ValueAtQuantile).
avgDensity := make([]float64, n)
for i := range n {
w := s.cfg.boundaries[i+1] - s.cfg.boundaries[i]
if w > 0 && s.Counts[i] > 0 {
avgDensity[i] = float64(s.Counts[i]) / w
}
}
boundaryDensity := make([]float64, n+1)
for i := range n {
switch i {
case 0:
boundaryDensity[i] = avgDensity[0]
default:
boundaryDensity[i] = (avgDensity[i-1] + avgDensity[i]) / 2.0
}
}

// Single-pass bucket walk: process all quantiles whose rank falls
// within each bucket before advancing to the next.
var cumCount float64
wi := 0
for i := range n {
fc := float64(s.Counts[i])
nextCum := cumCount + fc
for wi < len(walk) && nextCum >= walk[wi].rank {
localRank := walk[wi].rank - cumCount
lo := s.cfg.boundaries[i]
hi := s.cfg.boundaries[i+1]
w := hi - lo
if w <= 0 || fc == 0 {
results[walk[wi].idx] = lo
} else {
dL := boundaryDensity[i]
dR := boundaryDensity[i+1]
results[walk[wi].idx] = trapezoidalSolve(lo, w, fc, dL, dR, localRank)
}
wi++
}
cumCount = nextCum
if wi >= len(walk) {
break
}
}

// Any remaining entries (shouldn't happen, but safety).
for ; wi < len(walk); wi++ {
results[walk[wi].idx] = s.cfg.boundaries[n]
}

return results
}

// Mean returns the mean of all recorded values. Returns NaN if no
// observations have been recorded, matching Prometheus histogram behavior.
func (s *Snapshot) Mean() float64 {
Expand Down
Loading