Skip to content

Commit 0d540ae

Browse files
committed
Uses forked pkg/util/interner for dogstatsd workload
1 parent 9b1d0e8 commit 0d540ae

File tree

3 files changed

+75
-77
lines changed

3 files changed

+75
-77
lines changed

comp/dogstatsd/server/intern.go

+53-54
Original file line numberDiff line numberDiff line change
@@ -6,93 +6,92 @@
66
package server
77

88
import (
9+
"runtime"
10+
"unsafe"
11+
912
"github.com/DataDog/datadog-agent/pkg/config/utils"
1013
"github.com/DataDog/datadog-agent/pkg/telemetry"
11-
"github.com/DataDog/datadog-agent/pkg/util/log"
1214
)
1315

1416
var (
1517
// There are multiple instances of the interner, one per worker. Counters are normally fine,
1618
// gauges require special care to make sense. We don't need to clean up when an instance is
1719
// dropped, because it only happens on agent shutdown.
18-
tlmSIResets = telemetry.NewSimpleCounter("dogstatsd", "string_interner_resets",
19-
"Amount of resets of the string interner used in dogstatsd")
20-
tlmSIRSize = telemetry.NewSimpleGauge("dogstatsd", "string_interner_entries",
21-
"Number of entries in the string interner")
22-
tlmSIRBytes = telemetry.NewSimpleGauge("dogstatsd", "string_interner_bytes",
23-
"Number of bytes stored in the string interner")
2420
tlmSIRHits = telemetry.NewSimpleCounter("dogstatsd", "string_interner_hits",
2521
"Number of times string interner returned an existing string")
2622
tlmSIRMiss = telemetry.NewSimpleCounter("dogstatsd", "string_interner_miss",
2723
"Number of times string interner created a new string object")
2824
tlmSIRNew = telemetry.NewSimpleCounter("dogstatsd", "string_interner_new",
2925
"Number of times string interner was created")
30-
tlmSIRStrBytes = telemetry.NewSimpleHistogram("dogstatsd", "string_interner_str_bytes",
31-
"Number of times string with specific length were added",
32-
[]float64{1, 2, 4, 8, 16, 32, 64, 128})
3326
)
3427

35-
// stringInterner is a string cache providing a longer life for strings,
36-
// helping to avoid GC runs because they're re-used many times instead of
37-
// created every time.
28+
// A StringValue pointer is the handle to the underlying string value.
29+
// See Get how Value pointers may be used.
30+
type StringValue struct {
31+
_ [0]func() // prevent people from accidentally using value type as comparable
32+
cmpVal string
33+
resurrected bool
34+
}
35+
36+
// Get the underlying string value
37+
func (v *StringValue) Get() string {
38+
return v.cmpVal
39+
}
40+
41+
// stringInterner interns strings while allowing them to be cleaned up by the GC.
42+
// It can handle both string and []byte types without allocation.
3843
type stringInterner struct {
39-
strings map[string]string
40-
maxSize int
41-
curBytes int
4244
tlmEnabled bool
45+
valMap map[string]uintptr
4346
}
4447

45-
func newStringInterner(maxSize int) *stringInterner {
48+
// newStringInterner creates a new StringInterner
49+
func newStringInterner() *stringInterner {
4650
i := &stringInterner{
47-
strings: make(map[string]string),
48-
maxSize: maxSize,
51+
valMap: make(map[string]uintptr),
4952
tlmEnabled: utils.IsTelemetryEnabled(),
5053
}
54+
5155
if i.tlmEnabled {
5256
tlmSIRNew.Inc()
5357
}
5458
return i
5559
}
5660

57-
// LoadOrStore always returns the string from the cache, adding it into the
58-
// cache if needed.
59-
// If we need to store a new entry and the cache is at its maximum capacity,
60-
// it is reset.
61-
func (i *stringInterner) LoadOrStore(key []byte) string {
62-
// here is the string interner trick: the map lookup using
63-
// string(key) doesn't actually allocate a string, but is
64-
// returning the string value -> no new heap allocation
65-
// for this string.
66-
// See https://github.com/golang/go/commit/f5f5a8b6209f84961687d993b93ea0d397f5d5bf
67-
if s, found := i.strings[string(key)]; found {
68-
if i.tlmEnabled {
61+
// Get returns a pointer representing the []byte k
62+
//
63+
// The returned pointer will be the same for Get(v) and Get(v2)
64+
// if and only if v == v2. The returned pointer will also be the same
65+
// for a string with same contents as the byte slice.
66+
//
67+
//go:nocheckptr
68+
func (s *stringInterner) LoadOrStore(k []byte) *StringValue {
69+
var v *StringValue
70+
// the compiler will optimize the following map lookup to not alloc a string
71+
if addr, ok := s.valMap[string(k)]; ok {
72+
//goland:noinspection GoVetUnsafePointer
73+
v = (*StringValue)((unsafe.Pointer)(addr))
74+
v.resurrected = true
75+
if s.tlmEnabled {
6976
tlmSIRHits.Inc()
7077
}
71-
return s
78+
return v
7279
}
73-
if len(i.strings) >= i.maxSize {
74-
if i.tlmEnabled {
75-
tlmSIResets.Inc()
76-
tlmSIRBytes.Sub(float64(i.curBytes))
77-
tlmSIRSize.Sub(float64(len(i.strings)))
78-
i.curBytes = 0
79-
}
8080

81-
i.strings = make(map[string]string)
82-
log.Debug("clearing the string interner cache")
83-
84-
}
85-
86-
s := string(key)
87-
i.strings[s] = s
81+
v = &StringValue{cmpVal: string(k)}
82+
runtime.SetFinalizer(v, s.finalize)
83+
s.valMap[string(k)] = uintptr(unsafe.Pointer(v))
84+
tlmSIRMiss.Inc()
85+
return v
86+
}
8887

89-
if i.tlmEnabled {
90-
tlmSIRMiss.Inc()
91-
tlmSIRSize.Inc()
92-
tlmSIRBytes.Add(float64(len(s)))
93-
tlmSIRStrBytes.Observe(float64(len(s)))
94-
i.curBytes += len(s)
88+
func (s *stringInterner) finalize(v *StringValue) {
89+
if v.resurrected {
90+
// We lost the race. Somebody resurrected it while we
91+
// were about to finalize it. Try again next round.
92+
v.resurrected = false
93+
runtime.SetFinalizer(v, s.finalize)
94+
return
9595
}
96-
97-
return s
96+
delete(s.valMap, v.Get())
9897
}

comp/dogstatsd/server/intern_test.go

+18-18
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ import (
1313

1414
func TestInternLoadOrStoreValue(t *testing.T) {
1515
assert := assert.New(t)
16-
sInterner := newStringInterner(3)
16+
sInterner := newStringInterner()
1717

1818
foo := []byte("foo")
1919
bar := []byte("bar")
@@ -22,59 +22,59 @@ func TestInternLoadOrStoreValue(t *testing.T) {
2222

2323
// first test that the good value is returned.
2424

25-
v := sInterner.LoadOrStore(foo)
25+
v := sInterner.LoadOrStore(foo).Get()
2626
assert.Equal("foo", v)
27-
v = sInterner.LoadOrStore(bar)
27+
v = sInterner.LoadOrStore(bar).Get()
2828
assert.Equal("bar", v)
29-
v = sInterner.LoadOrStore(far)
29+
v = sInterner.LoadOrStore(far).Get()
3030
assert.Equal("far", v)
31-
v = sInterner.LoadOrStore(boo)
31+
v = sInterner.LoadOrStore(boo).Get()
3232
assert.Equal("boo", v)
3333
}
3434

3535
func TestInternLoadOrStorePointer(t *testing.T) {
3636
assert := assert.New(t)
37-
sInterner := newStringInterner(4)
37+
sInterner := newStringInterner()
3838

3939
foo := []byte("foo")
4040
bar := []byte("bar")
4141
boo := []byte("boo")
4242

4343
// first test that the good value is returned.
4444

45-
v := sInterner.LoadOrStore(foo)
45+
v := sInterner.LoadOrStore(foo).Get()
4646
assert.Equal("foo", v)
47-
v2 := sInterner.LoadOrStore(foo)
47+
v2 := sInterner.LoadOrStore(foo).Get()
4848
assert.Equal(&v, &v2, "must point to the same address")
49-
v2 = sInterner.LoadOrStore(bar)
49+
v2 = sInterner.LoadOrStore(bar).Get()
5050
assert.NotEqual(&v, &v2, "must point to a different address")
51-
v3 := sInterner.LoadOrStore(bar)
51+
v3 := sInterner.LoadOrStore(bar).Get()
5252
assert.Equal(&v2, &v3, "must point to the same address")
5353

54-
v4 := sInterner.LoadOrStore(boo)
54+
v4 := sInterner.LoadOrStore(boo).Get()
5555
assert.NotEqual(&v, &v4, "must point to a different address")
5656
assert.NotEqual(&v2, &v4, "must point to a different address")
5757
assert.NotEqual(&v3, &v4, "must point to a different address")
5858
}
5959

6060
func TestInternLoadOrStoreReset(t *testing.T) {
6161
assert := assert.New(t)
62-
sInterner := newStringInterner(4)
62+
sInterner := newStringInterner()
6363

6464
// first test that the good value is returned.
6565
sInterner.LoadOrStore([]byte("foo"))
66-
assert.Equal(1, len(sInterner.strings))
66+
assert.Equal(1, len(sInterner.valMap))
6767
sInterner.LoadOrStore([]byte("bar"))
6868
sInterner.LoadOrStore([]byte("bar"))
69-
assert.Equal(2, len(sInterner.strings))
69+
assert.Equal(2, len(sInterner.valMap))
7070
sInterner.LoadOrStore([]byte("boo"))
71-
assert.Equal(3, len(sInterner.strings))
71+
assert.Equal(3, len(sInterner.valMap))
7272
sInterner.LoadOrStore([]byte("far"))
7373
sInterner.LoadOrStore([]byte("far"))
7474
sInterner.LoadOrStore([]byte("far"))
75-
assert.Equal(4, len(sInterner.strings))
75+
assert.Equal(4, len(sInterner.valMap))
7676
sInterner.LoadOrStore([]byte("val"))
77-
assert.Equal(1, len(sInterner.strings))
77+
assert.Equal(5, len(sInterner.valMap))
7878
sInterner.LoadOrStore([]byte("val"))
79-
assert.Equal(1, len(sInterner.strings))
79+
assert.Equal(5, len(sInterner.valMap))
8080
}

comp/dogstatsd/server/parse.go

+4-5
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,10 @@ type parser struct {
5151
}
5252

5353
func newParser(cfg config.Reader, float64List *float64ListPool) *parser {
54-
stringInternerCacheSize := cfg.GetInt("dogstatsd_string_interner_size")
5554
readTimestamps := cfg.GetBool("dogstatsd_no_aggregation_pipeline")
5655

5756
return &parser{
58-
interner: newStringInterner(stringInternerCacheSize),
57+
interner: newStringInterner(),
5958
readTimestamps: readTimestamps,
6059
float64List: float64List,
6160
dsdOriginEnabled: cfg.GetBool("dogstatsd_origin_detection_client"),
@@ -97,11 +96,11 @@ func (p *parser) parseTags(rawTags []byte) []string {
9796
if tagPos < 0 {
9897
break
9998
}
100-
tagsList[i] = p.interner.LoadOrStore(rawTags[:tagPos])
99+
tagsList[i] = p.interner.LoadOrStore(rawTags[:tagPos]).Get()
101100
rawTags = rawTags[tagPos+len(commaSeparator):]
102101
i++
103102
}
104-
tagsList[i] = p.interner.LoadOrStore(rawTags)
103+
tagsList[i] = p.interner.LoadOrStore(rawTags).Get()
105104
return tagsList
106105
}
107106

@@ -188,7 +187,7 @@ func (p *parser) parseMetricSample(message []byte) (dogstatsdMetricSample, error
188187
}
189188

190189
return dogstatsdMetricSample{
191-
name: p.interner.LoadOrStore(name),
190+
name: p.interner.LoadOrStore(name).Get(),
192191
value: value,
193192
values: values,
194193
setValue: string(setValue),

0 commit comments

Comments
 (0)