diff --git a/histogram.go b/histogram.go index 6751294..485e9ac 100644 --- a/histogram.go +++ b/histogram.go @@ -27,6 +27,7 @@ import ( "sort" "sync" "sync/atomic" + "time" ) const maxSchema = 8 @@ -214,6 +215,56 @@ type Params struct { ErrorBound float64 } +// Common Params presets, modeled after the bucket tiers in CockroachDB's +// pkg/util/metric/histogram_buckets.go and Prometheus DefBuckets. +// +// Time-based presets expect values in nanoseconds, matching Go's +// time.Duration. Record with int64(duration). +var ( + // HiResLatencyParams covers high-resolution latency from 1us to 5m. + // Use for: end-to-end request latencies where you need visibility into + // both fast-path sub-millisecond operations and slow tail outliers. + HiResLatencyParams = Params{ + Lo: float64(time.Microsecond), + Hi: float64(5 * time.Minute), + } + + // IOLatencyParams covers fast I/O operations from 10us to 10s. + // Use for: RPC latencies, raft operations, disk I/O, network round-trips. + IOLatencyParams = Params{ + Lo: float64(10 * time.Microsecond), + Hi: float64(10 * time.Second), + } + + // ResponseTimeParams covers request/response latencies from 1ms to 30s. + // Use for: SQL query execution, HTTP handlers, API response times. + ResponseTimeParams = Params{ + Lo: float64(time.Millisecond), + Hi: float64(30 * time.Second), + } + + // LongRunningParams covers long-running operations from 500ms to 1h. + // Use for: backups, restores, migrations, bulk ingestion jobs. + LongRunningParams = Params{ + Lo: float64(500 * time.Millisecond), + Hi: float64(time.Hour), + } + + // DataSizeParams covers data payload sizes from 1KB to 16MB (in bytes). + // Use for: message sizes, request/response bodies, SST sizes. + DataSizeParams = Params{ + Lo: 1024, + Hi: 16 * 1024 * 1024, + } + + // MemoryUsageParams covers memory tracking from 1B to 64MB (in bytes). + // Use for: memory allocations, buffer sizes, cache entry sizes. + MemoryUsageParams = Params{ + Lo: 1, + Hi: 64 * 1024 * 1024, + } +) + func (p Params) withDefaults() Params { if p.Lo == 0 { p.Lo = 1 diff --git a/histogram_test.go b/histogram_test.go index 3cec5ba..3e4e87d 100644 --- a/histogram_test.go +++ b/histogram_test.go @@ -463,6 +463,147 @@ func TestQuantileEdgeCases(t *testing.T) { }) } +func TestValuesAtQuantiles(t *testing.T) { + t.Run("matches individual calls", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1e6, ErrorBound: 0.05}) + rng := rand.New(rand.NewSource(42)) + for i := 0; i < 10000; i++ { + h.Record(int64(rng.Float64()*999999) + 1) + } + snap := h.Snapshot() + qs := []float64{0, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 0.999, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, + "q=%.3f: single=%.6f batch=%.6f", q, single, batch[i]) + } + }) + + t.Run("preserves input order", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := int64(1); i <= 1000; i++ { + h.Record(i) + } + snap := h.Snapshot() + // Deliberately unsorted quantiles. + qs := []float64{0.99, 0.25, 0.75, 0.50, 0.10} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, + "q=%.2f at index %d", q, i) + } + }) + + t.Run("empty histogram", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + snap := h.Snapshot() + results := snap.ValuesAtQuantiles([]float64{0, 0.5, 1.0}) + for i, r := range results { + require.Equalf(t, 0.0, r, "index %d", i) + } + }) + + t.Run("empty quantiles slice", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + h.Record(500) + snap := h.Snapshot() + require.Empty(t, snap.ValuesAtQuantiles([]float64{})) + require.Empty(t, snap.ValuesAtQuantiles(nil)) + }) + + t.Run("single quantile", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := int64(1); i <= 100; i++ { + h.Record(i) + } + snap := h.Snapshot() + batch := snap.ValuesAtQuantiles([]float64{0.50}) + single := snap.ValueAtQuantile(0.50) + require.InDelta(t, single, batch[0], 1e-9) + }) + + t.Run("all zeros", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := 0; i < 100; i++ { + h.Record(0) + } + snap := h.Snapshot() + qs := []float64{0, 0.50, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("all underflow", func(t *testing.T) { + h := New(Params{Lo: 100, Hi: 10000, ErrorBound: 0.05}) + for i := int64(1); i <= 50; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0, 0.50, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("all overflow", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 100, ErrorBound: 0.05}) + for i := int64(200); i <= 300; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0, 0.50, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("mixed underflow overflow and in-range", func(t *testing.T) { + h := New(Params{Lo: 100, Hi: 10000, ErrorBound: 0.05}) + for i := 0; i < 10; i++ { + h.Record(0) + } + for i := int64(1); i <= 10; i++ { + h.Record(i) + } + for i := int64(500); i <= 559; i++ { + h.Record(i) + } + for i := int64(20000); i <= 20019; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("duplicate quantiles", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := int64(1); i <= 100; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0.50, 0.50, 0.50} + batch := snap.ValuesAtQuantiles(qs) + single := snap.ValueAtQuantile(0.50) + for i := range qs { + require.InDeltaf(t, single, batch[i], 1e-9, "index %d", i) + } + }) +} + func TestMeanAndTotal(t *testing.T) { h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) h.Record(100) diff --git a/quantile.go b/quantile.go index f768402..8eb5534 100644 --- a/quantile.go +++ b/quantile.go @@ -8,7 +8,10 @@ package goodhistogram -import "math" +import ( + "math" + "sort" +) // ValueAtQuantile returns the estimated value at the given quantile q ∈ [0, 1] // using trapezoidal interpolation. @@ -130,6 +133,137 @@ func (s *Snapshot) ValueAtQuantile(q float64) float64 { return s.cfg.boundaries[n] } +// ValuesAtQuantiles returns the estimated values at the given quantiles +// in a single pass through the bucket array. The input quantiles should be +// in [0, 1]. Results are returned in the same order as the input quantiles. +// +// This is more efficient than calling ValueAtQuantile in a loop because it +// computes bucket densities once and walks the bucket array once regardless +// of how many quantiles are requested. +func (s *Snapshot) ValuesAtQuantiles(qs []float64) []float64 { + results := make([]float64, len(qs)) + if len(qs) == 0 || s.TotalCount == 0 { + return results + } + + belowLo := float64(s.ZeroCount + s.Underflow) + var inRangeCount uint64 + for _, c := range s.Counts { + inRangeCount += c + } + + // Resolve edge cases per-quantile and collect those needing a bucket walk. + type walkEntry struct { + idx int // index in results + rank float64 // adjusted rank within in-range buckets + } + var walk []walkEntry + for i, q := range qs { + rank := q * float64(s.TotalCount) + if rank <= 0 { + if s.ZeroCount+s.Underflow > 0 { + results[i] = s.cfg.lo + } else { + results[i] = s.cfg.hi + for j, c := range s.Counts { + if c > 0 { + results[i] = s.cfg.boundaries[j] + break + } + } + } + continue + } + if rank >= float64(s.TotalCount) { + if s.Overflow > 0 { + results[i] = s.cfg.hi + } else { + results[i] = s.cfg.lo + for j := len(s.Counts) - 1; j >= 0; j-- { + if s.Counts[j] > 0 { + results[i] = s.cfg.boundaries[j+1] + break + } + } + } + continue + } + if rank <= belowLo { + results[i] = s.cfg.lo + continue + } + adjusted := rank - belowLo + if adjusted > float64(inRangeCount) { + results[i] = s.cfg.hi + continue + } + walk = append(walk, walkEntry{idx: i, rank: adjusted}) + } + + if len(walk) == 0 { + return results + } + + // Sort by rank for a single ascending pass through the buckets. + sort.Slice(walk, func(i, j int) bool { + return walk[i].rank < walk[j].rank + }) + + n := len(s.Counts) + + // Compute densities once (same as ValueAtQuantile). + avgDensity := make([]float64, n) + for i := range n { + w := s.cfg.boundaries[i+1] - s.cfg.boundaries[i] + if w > 0 && s.Counts[i] > 0 { + avgDensity[i] = float64(s.Counts[i]) / w + } + } + boundaryDensity := make([]float64, n+1) + for i := range n { + switch i { + case 0: + boundaryDensity[i] = avgDensity[0] + default: + boundaryDensity[i] = (avgDensity[i-1] + avgDensity[i]) / 2.0 + } + } + + // Single-pass bucket walk: process all quantiles whose rank falls + // within each bucket before advancing to the next. + var cumCount float64 + wi := 0 + for i := range n { + fc := float64(s.Counts[i]) + nextCum := cumCount + fc + for wi < len(walk) && nextCum >= walk[wi].rank { + localRank := walk[wi].rank - cumCount + lo := s.cfg.boundaries[i] + hi := s.cfg.boundaries[i+1] + w := hi - lo + if w <= 0 || fc == 0 { + results[walk[wi].idx] = lo + } else { + dL := boundaryDensity[i] + dR := boundaryDensity[i+1] + results[walk[wi].idx] = trapezoidalSolve(lo, w, fc, dL, dR, localRank) + } + wi++ + } + cumCount = nextCum + if wi >= len(walk) { + break + } + } + + // Any remaining entries (shouldn't happen, but safety). + for ; wi < len(walk); wi++ { + results[walk[wi].idx] = s.cfg.boundaries[n] + } + + return results +} + // Mean returns the mean of all recorded values. Returns NaN if no // observations have been recorded, matching Prometheus histogram behavior. func (s *Snapshot) Mean() float64 {