Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 5088a2c

Browse files
committed
sort symbols in order of frequency rather than lexicographically
Signed-off-by: Callum Styan <[email protected]>
1 parent b4c7c80 commit 5088a2c

7 files changed

+62
-45
lines changed

block.go

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ import (
3535
type IndexWriter interface {
3636
// AddSymbols registers all string symbols that are encountered in series
3737
// and other indices.
38-
AddSymbols(sym map[string]struct{}) error
38+
AddSymbols(sym map[string]int) error
3939

4040
// AddSeries populates the index writer with a series and its offsets
4141
// of chunks that the index can reference.
@@ -62,7 +62,7 @@ type IndexWriter interface {
6262
type IndexReader interface {
6363
// Symbols returns a set of string symbols that may occur in series' labels
6464
// and indices.
65-
Symbols() (map[string]struct{}, error)
65+
Symbols() (map[string]int, error)
6666

6767
// LabelValues returns the possible label values.
6868
LabelValues(names ...string) (index.StringTuples, error)
@@ -375,7 +375,7 @@ type blockIndexReader struct {
375375
b *Block
376376
}
377377

378-
func (r blockIndexReader) Symbols() (map[string]struct{}, error) {
378+
func (r blockIndexReader) Symbols() (map[string]int, error) {
379379
s, err := r.ir.Symbols()
380380
return s, errors.Wrapf(err, "block: %s", r.b.Meta().ULID)
381381
}

compact.go

+2-2
Original file line numberDiff line numberDiff line change
@@ -526,7 +526,7 @@ func (c *LeveledCompactor) write(dest string, meta *BlockMeta, blocks ...BlockRe
526526
func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta, indexw IndexWriter, chunkw ChunkWriter) error {
527527
var (
528528
set ChunkSeriesSet
529-
allSymbols = make(map[string]struct{}, 1<<16)
529+
allSymbols = make(map[string]int, 1<<16)
530530
closers = []io.Closer{}
531531
)
532532
defer func() { closeAll(closers...) }()
@@ -555,7 +555,7 @@ func (c *LeveledCompactor) populateBlock(blocks []BlockReader, meta *BlockMeta,
555555
return errors.Wrap(err, "read symbols")
556556
}
557557
for s := range symbols {
558-
allSymbols[s] = struct{}{}
558+
allSymbols[s] = symbols[s]
559559
}
560560

561561
all, err := indexr.Postings(index.AllPostingsKey())

head.go

+15-9
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ type Head struct {
6767
series *stripeSeries
6868

6969
symMtx sync.RWMutex
70-
symbols map[string]struct{}
70+
symbols map[string]int
7171
values map[string]stringset // label names to possible values
7272

7373
postings *index.MemPostings // postings lists for terms
@@ -187,7 +187,7 @@ func NewHead(r prometheus.Registerer, l log.Logger, wal *wal.WAL, chunkRange int
187187
maxTime: math.MinInt64,
188188
series: newStripeSeries(),
189189
values: map[string]stringset{},
190-
symbols: map[string]struct{}{},
190+
symbols: make(map[string]int),
191191
postings: index.NewUnorderedMemPostings(),
192192
tombstones: NewMemTombstones(),
193193
}
@@ -790,12 +790,18 @@ func (h *Head) gc() {
790790
h.postings.Delete(deleted)
791791

792792
// Rebuild symbols and label value indices from what is left in the postings terms.
793-
symbols := make(map[string]struct{})
793+
symbols := make(map[string]int)
794794
values := make(map[string]stringset, len(h.values))
795795

796+
<<<<<<< HEAD
796797
if err := h.postings.Iter(func(t labels.Label, _ index.Postings) error {
797798
symbols[t.Name] = struct{}{}
798799
symbols[t.Value] = struct{}{}
800+
=======
801+
h.postings.Iter(func(t labels.Label, _ index.Postings) error {
802+
symbols[t.Name]++
803+
symbols[t.Value]++
804+
>>>>>>> sort symbols in order of frequency rather than lexicographically
799805

800806
ss, ok := values[t.Name]
801807
if !ok {
@@ -939,14 +945,14 @@ func (h *headIndexReader) Close() error {
939945
return nil
940946
}
941947

942-
func (h *headIndexReader) Symbols() (map[string]struct{}, error) {
948+
func (h *headIndexReader) Symbols() (map[string]int, error) {
943949
h.head.symMtx.RLock()
944950
defer h.head.symMtx.RUnlock()
945951

946-
res := make(map[string]struct{}, len(h.head.symbols))
952+
res := make(map[string]int, len(h.head.symbols))
947953

948-
for s := range h.head.symbols {
949-
res[s] = struct{}{}
954+
for s, num := range h.head.symbols {
955+
res[s] = num
950956
}
951957
return res, nil
952958
}
@@ -1078,8 +1084,8 @@ func (h *Head) getOrCreateWithID(id, hash uint64, lset labels.Labels) (*memSerie
10781084
}
10791085
valset.set(l.Value)
10801086

1081-
h.symbols[l.Name] = struct{}{}
1082-
h.symbols[l.Value] = struct{}{}
1087+
h.symbols[l.Name]++
1088+
h.symbols[l.Value]++
10831089
}
10841090

10851091
return s, true

head_test.go

+6-6
Original file line numberDiff line numberDiff line change
@@ -209,12 +209,12 @@ func TestHead_Truncate(t *testing.T) {
209209
testutil.Assert(t, postingsB2 == nil, "")
210210
testutil.Assert(t, postingsC1 == nil, "")
211211

212-
testutil.Equals(t, map[string]struct{}{
213-
"": {}, // from 'all' postings list
214-
"a": {},
215-
"b": {},
216-
"1": {},
217-
"2": {},
212+
testutil.Equals(t, map[string]int{
213+
"": 2, // from 'all' postings list
214+
"a": 2,
215+
"b": 1,
216+
"1": 2,
217+
"2": 1,
218218
}, h.symbols)
219219

220220
testutil.Equals(t, map[string]stringset{

index/index.go

+21-10
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,17 @@ func (s indexWriterSeriesSlice) Less(i, j int) bool {
5555
return labels.Compare(s[i].labels, s[j].labels) < 0
5656
}
5757

58+
type symbolFrequencyPair struct {
59+
symbol string
60+
frequency int
61+
}
62+
63+
type symbolFrequencylist []symbolFrequencyPair
64+
65+
func (s symbolFrequencylist) Len() int { return len(s) }
66+
func (s symbolFrequencylist) Swap(i, j int) { s[i], s[j] = s[j], s[i] }
67+
func (s symbolFrequencylist) Less(i, j int) bool { return s[i].frequency < s[j].frequency }
68+
5869
type indexWriterStage uint8
5970

6071
const (
@@ -332,17 +343,17 @@ func (w *Writer) AddSeries(ref uint64, lset labels.Labels, chunks ...chunks.Meta
332343
return nil
333344
}
334345

335-
func (w *Writer) AddSymbols(sym map[string]struct{}) error {
346+
func (w *Writer) AddSymbols(sym map[string]int) error {
336347
if err := w.ensureStage(idxStageSymbols); err != nil {
337348
return err
338349
}
339350
// Generate sorted list of strings we will store as reference table.
340-
symbols := make([]string, 0, len(sym))
351+
symbols := make(symbolFrequencylist, 0, len(sym))
341352

342-
for s := range sym {
343-
symbols = append(symbols, s)
353+
for k, v := range sym {
354+
symbols = append(symbols, symbolFrequencyPair{k, v})
344355
}
345-
sort.Strings(symbols)
356+
sort.Sort(sort.Reverse(symbols))
346357

347358
const headerSize = 4
348359

@@ -354,8 +365,8 @@ func (w *Writer) AddSymbols(sym map[string]struct{}) error {
354365
w.symbols = make(map[string]uint32, len(symbols))
355366

356367
for index, s := range symbols {
357-
w.symbols[s] = uint32(index)
358-
w.buf2.putUvarintStr(s)
368+
w.symbols[s.symbol] = uint32(index)
369+
w.buf2.putUvarintStr(s.symbol)
359370
}
360371

361372
w.buf1.putBE32int(w.buf2.len())
@@ -834,11 +845,11 @@ func (r *Reader) lookupSymbol(o uint32) (string, error) {
834845
}
835846

836847
// Symbols returns a set of symbols that exist within the index.
837-
func (r *Reader) Symbols() (map[string]struct{}, error) {
838-
res := make(map[string]struct{}, len(r.symbols))
848+
func (r *Reader) Symbols() (map[string]int, error) {
849+
res := make(map[string]int, len(r.symbols))
839850

840851
for _, s := range r.symbols {
841-
res[s] = struct{}{}
852+
res[s] = 0
842853
}
843854
return res, nil
844855
}

index/index_test.go

+10-10
Original file line numberDiff line numberDiff line change
@@ -191,13 +191,13 @@ func TestIndexRW_Postings(t *testing.T) {
191191
labels.FromStrings("a", "1", "b", "4"),
192192
}
193193

194-
err = iw.AddSymbols(map[string]struct{}{
195-
"a": {},
196-
"b": {},
197-
"1": {},
198-
"2": {},
199-
"3": {},
200-
"4": {},
194+
err = iw.AddSymbols(map[string]int{
195+
"a": 1,
196+
"b": 2,
197+
"1": 1,
198+
"2": 4,
199+
"3": 5,
200+
"4": 3,
201201
})
202202
testutil.Ok(t, err)
203203

@@ -245,11 +245,11 @@ func TestPersistence_index_e2e(t *testing.T) {
245245
// Sort labels as the index writer expects series in sorted order.
246246
sort.Sort(labels.Slice(lbls))
247247

248-
symbols := map[string]struct{}{}
248+
symbols := make(map[string]int)
249249
for _, lset := range lbls {
250250
for _, l := range lset {
251-
symbols[l.Name] = struct{}{}
252-
symbols[l.Value] = struct{}{}
251+
symbols[l.Name] = 0
252+
symbols[l.Value] = 0
253253
}
254254
}
255255

querier_test.go

+5-5
Original file line numberDiff line numberDiff line change
@@ -1390,20 +1390,20 @@ type mockIndex struct {
13901390
series map[uint64]series
13911391
labelIndex map[string][]string
13921392
postings map[labels.Label][]uint64
1393-
symbols map[string]struct{}
1393+
symbols map[string]int
13941394
}
13951395

13961396
func newMockIndex() mockIndex {
13971397
ix := mockIndex{
13981398
series: make(map[uint64]series),
13991399
labelIndex: make(map[string][]string),
14001400
postings: make(map[labels.Label][]uint64),
1401-
symbols: make(map[string]struct{}),
1401+
symbols: make(map[string]int),
14021402
}
14031403
return ix
14041404
}
14051405

1406-
func (m mockIndex) Symbols() (map[string]struct{}, error) {
1406+
func (m mockIndex) Symbols() (map[string]int, error) {
14071407
return m.symbols, nil
14081408
}
14091409

@@ -1412,8 +1412,8 @@ func (m mockIndex) AddSeries(ref uint64, l labels.Labels, chunks ...chunks.Meta)
14121412
return errors.Errorf("series with reference %d already added", ref)
14131413
}
14141414
for _, lbl := range l {
1415-
m.symbols[lbl.Name] = struct{}{}
1416-
m.symbols[lbl.Value] = struct{}{}
1415+
m.symbols[lbl.Name] = 0
1416+
m.symbols[lbl.Value] = 0
14171417
}
14181418

14191419
s := series{l: l}

0 commit comments

Comments
 (0)