From be7dbe10c6cd456b0ae4a9ecc67f30e37eef78a3 Mon Sep 17 00:00:00 2001 From: Brian Dillmann Date: Mon, 20 Apr 2026 14:19:30 -0400 Subject: [PATCH] goodhistogram: add batch quantile computation and common Params presets Computing p50/p90/p99/p999 with individual ValueAtQuantile calls redundantly walks the bucket array and recomputes boundary densities for each quantile. ValuesAtQuantiles resolves all requested quantiles in a single pass: it sorts them by rank, computes boundary densities once, and processes each quantile as the walk reaches its bucket. Also adds common Params presets (HiResLatency, IOLatency, ResponseTime, LongRunning, DataSize, MemoryUsage) modeled after CockroachDB's histogram_buckets.go tiers, so callers don't need to research appropriate ranges for standard measurement scenarios. Co-Authored-By: roachdev-claude --- histogram.go | 51 +++++++++++++++++ histogram_test.go | 141 ++++++++++++++++++++++++++++++++++++++++++++++ quantile.go | 136 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 327 insertions(+), 1 deletion(-) diff --git a/histogram.go b/histogram.go index 6751294..485e9ac 100644 --- a/histogram.go +++ b/histogram.go @@ -27,6 +27,7 @@ import ( "sort" "sync" "sync/atomic" + "time" ) const maxSchema = 8 @@ -214,6 +215,56 @@ type Params struct { ErrorBound float64 } +// Common Params presets, modeled after the bucket tiers in CockroachDB's +// pkg/util/metric/histogram_buckets.go and Prometheus DefBuckets. +// +// Time-based presets expect values in nanoseconds, matching Go's +// time.Duration. Record with int64(duration). +var ( + // HiResLatencyParams covers high-resolution latency from 1us to 5m. + // Use for: end-to-end request latencies where you need visibility into + // both fast-path sub-millisecond operations and slow tail outliers. + HiResLatencyParams = Params{ + Lo: float64(time.Microsecond), + Hi: float64(5 * time.Minute), + } + + // IOLatencyParams covers fast I/O operations from 10us to 10s. + // Use for: RPC latencies, raft operations, disk I/O, network round-trips. + IOLatencyParams = Params{ + Lo: float64(10 * time.Microsecond), + Hi: float64(10 * time.Second), + } + + // ResponseTimeParams covers request/response latencies from 1ms to 30s. + // Use for: SQL query execution, HTTP handlers, API response times. + ResponseTimeParams = Params{ + Lo: float64(time.Millisecond), + Hi: float64(30 * time.Second), + } + + // LongRunningParams covers long-running operations from 500ms to 1h. + // Use for: backups, restores, migrations, bulk ingestion jobs. + LongRunningParams = Params{ + Lo: float64(500 * time.Millisecond), + Hi: float64(time.Hour), + } + + // DataSizeParams covers data payload sizes from 1KB to 16MB (in bytes). + // Use for: message sizes, request/response bodies, SST sizes. + DataSizeParams = Params{ + Lo: 1024, + Hi: 16 * 1024 * 1024, + } + + // MemoryUsageParams covers memory tracking from 1B to 64MB (in bytes). + // Use for: memory allocations, buffer sizes, cache entry sizes. + MemoryUsageParams = Params{ + Lo: 1, + Hi: 64 * 1024 * 1024, + } +) + func (p Params) withDefaults() Params { if p.Lo == 0 { p.Lo = 1 diff --git a/histogram_test.go b/histogram_test.go index 3cec5ba..3e4e87d 100644 --- a/histogram_test.go +++ b/histogram_test.go @@ -463,6 +463,147 @@ func TestQuantileEdgeCases(t *testing.T) { }) } +func TestValuesAtQuantiles(t *testing.T) { + t.Run("matches individual calls", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1e6, ErrorBound: 0.05}) + rng := rand.New(rand.NewSource(42)) + for i := 0; i < 10000; i++ { + h.Record(int64(rng.Float64()*999999) + 1) + } + snap := h.Snapshot() + qs := []float64{0, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 0.999, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, + "q=%.3f: single=%.6f batch=%.6f", q, single, batch[i]) + } + }) + + t.Run("preserves input order", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := int64(1); i <= 1000; i++ { + h.Record(i) + } + snap := h.Snapshot() + // Deliberately unsorted quantiles. + qs := []float64{0.99, 0.25, 0.75, 0.50, 0.10} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, + "q=%.2f at index %d", q, i) + } + }) + + t.Run("empty histogram", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + snap := h.Snapshot() + results := snap.ValuesAtQuantiles([]float64{0, 0.5, 1.0}) + for i, r := range results { + require.Equalf(t, 0.0, r, "index %d", i) + } + }) + + t.Run("empty quantiles slice", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + h.Record(500) + snap := h.Snapshot() + require.Empty(t, snap.ValuesAtQuantiles([]float64{})) + require.Empty(t, snap.ValuesAtQuantiles(nil)) + }) + + t.Run("single quantile", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := int64(1); i <= 100; i++ { + h.Record(i) + } + snap := h.Snapshot() + batch := snap.ValuesAtQuantiles([]float64{0.50}) + single := snap.ValueAtQuantile(0.50) + require.InDelta(t, single, batch[0], 1e-9) + }) + + t.Run("all zeros", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := 0; i < 100; i++ { + h.Record(0) + } + snap := h.Snapshot() + qs := []float64{0, 0.50, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("all underflow", func(t *testing.T) { + h := New(Params{Lo: 100, Hi: 10000, ErrorBound: 0.05}) + for i := int64(1); i <= 50; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0, 0.50, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("all overflow", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 100, ErrorBound: 0.05}) + for i := int64(200); i <= 300; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0, 0.50, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("mixed underflow overflow and in-range", func(t *testing.T) { + h := New(Params{Lo: 100, Hi: 10000, ErrorBound: 0.05}) + for i := 0; i < 10; i++ { + h.Record(0) + } + for i := int64(1); i <= 10; i++ { + h.Record(i) + } + for i := int64(500); i <= 559; i++ { + h.Record(i) + } + for i := int64(20000); i <= 20019; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95, 0.99, 1.0} + batch := snap.ValuesAtQuantiles(qs) + for i, q := range qs { + single := snap.ValueAtQuantile(q) + require.InDeltaf(t, single, batch[i], 1e-9, "q=%.2f", q) + } + }) + + t.Run("duplicate quantiles", func(t *testing.T) { + h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) + for i := int64(1); i <= 100; i++ { + h.Record(i) + } + snap := h.Snapshot() + qs := []float64{0.50, 0.50, 0.50} + batch := snap.ValuesAtQuantiles(qs) + single := snap.ValueAtQuantile(0.50) + for i := range qs { + require.InDeltaf(t, single, batch[i], 1e-9, "index %d", i) + } + }) +} + func TestMeanAndTotal(t *testing.T) { h := New(Params{Lo: 1, Hi: 1000, ErrorBound: 0.05}) h.Record(100) diff --git a/quantile.go b/quantile.go index f768402..8eb5534 100644 --- a/quantile.go +++ b/quantile.go @@ -8,7 +8,10 @@ package goodhistogram -import "math" +import ( + "math" + "sort" +) // ValueAtQuantile returns the estimated value at the given quantile q ∈ [0, 1] // using trapezoidal interpolation. @@ -130,6 +133,137 @@ func (s *Snapshot) ValueAtQuantile(q float64) float64 { return s.cfg.boundaries[n] } +// ValuesAtQuantiles returns the estimated values at the given quantiles +// in a single pass through the bucket array. The input quantiles should be +// in [0, 1]. Results are returned in the same order as the input quantiles. +// +// This is more efficient than calling ValueAtQuantile in a loop because it +// computes bucket densities once and walks the bucket array once regardless +// of how many quantiles are requested. +func (s *Snapshot) ValuesAtQuantiles(qs []float64) []float64 { + results := make([]float64, len(qs)) + if len(qs) == 0 || s.TotalCount == 0 { + return results + } + + belowLo := float64(s.ZeroCount + s.Underflow) + var inRangeCount uint64 + for _, c := range s.Counts { + inRangeCount += c + } + + // Resolve edge cases per-quantile and collect those needing a bucket walk. + type walkEntry struct { + idx int // index in results + rank float64 // adjusted rank within in-range buckets + } + var walk []walkEntry + for i, q := range qs { + rank := q * float64(s.TotalCount) + if rank <= 0 { + if s.ZeroCount+s.Underflow > 0 { + results[i] = s.cfg.lo + } else { + results[i] = s.cfg.hi + for j, c := range s.Counts { + if c > 0 { + results[i] = s.cfg.boundaries[j] + break + } + } + } + continue + } + if rank >= float64(s.TotalCount) { + if s.Overflow > 0 { + results[i] = s.cfg.hi + } else { + results[i] = s.cfg.lo + for j := len(s.Counts) - 1; j >= 0; j-- { + if s.Counts[j] > 0 { + results[i] = s.cfg.boundaries[j+1] + break + } + } + } + continue + } + if rank <= belowLo { + results[i] = s.cfg.lo + continue + } + adjusted := rank - belowLo + if adjusted > float64(inRangeCount) { + results[i] = s.cfg.hi + continue + } + walk = append(walk, walkEntry{idx: i, rank: adjusted}) + } + + if len(walk) == 0 { + return results + } + + // Sort by rank for a single ascending pass through the buckets. + sort.Slice(walk, func(i, j int) bool { + return walk[i].rank < walk[j].rank + }) + + n := len(s.Counts) + + // Compute densities once (same as ValueAtQuantile). + avgDensity := make([]float64, n) + for i := range n { + w := s.cfg.boundaries[i+1] - s.cfg.boundaries[i] + if w > 0 && s.Counts[i] > 0 { + avgDensity[i] = float64(s.Counts[i]) / w + } + } + boundaryDensity := make([]float64, n+1) + for i := range n { + switch i { + case 0: + boundaryDensity[i] = avgDensity[0] + default: + boundaryDensity[i] = (avgDensity[i-1] + avgDensity[i]) / 2.0 + } + } + + // Single-pass bucket walk: process all quantiles whose rank falls + // within each bucket before advancing to the next. + var cumCount float64 + wi := 0 + for i := range n { + fc := float64(s.Counts[i]) + nextCum := cumCount + fc + for wi < len(walk) && nextCum >= walk[wi].rank { + localRank := walk[wi].rank - cumCount + lo := s.cfg.boundaries[i] + hi := s.cfg.boundaries[i+1] + w := hi - lo + if w <= 0 || fc == 0 { + results[walk[wi].idx] = lo + } else { + dL := boundaryDensity[i] + dR := boundaryDensity[i+1] + results[walk[wi].idx] = trapezoidalSolve(lo, w, fc, dL, dR, localRank) + } + wi++ + } + cumCount = nextCum + if wi >= len(walk) { + break + } + } + + // Any remaining entries (shouldn't happen, but safety). + for ; wi < len(walk); wi++ { + results[walk[wi].idx] = s.cfg.boundaries[n] + } + + return results +} + // Mean returns the mean of all recorded values. Returns NaN if no // observations have been recorded, matching Prometheus histogram behavior. func (s *Snapshot) Mean() float64 {