Skip to content
This repository was archived by the owner on Aug 13, 2019. It is now read-only.

Commit 7cfcf3d

Browse files
committed
add bitmapPostings
Signed-off-by: naivewong <[email protected]>
1 parent bf6c0ae commit 7cfcf3d

File tree

5 files changed

+284
-45
lines changed

5 files changed

+284
-45
lines changed

encoding/encoding.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ type Encbuf struct {
3434
Count uint8
3535
}
3636

37-
func (e *Encbuf) Reset() {
38-
e.B = e.B[:0]
37+
func (e *Encbuf) Reset() {
38+
e.B = e.B[:0]
3939
e.Count = 0
4040
}
4141

index/index.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -532,7 +532,7 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
532532
// The base.
533533
w.buf2.PutUvarint32(refs[0])
534534
// The width.
535-
width := bits.Len32(uint32(refs[len(refs)-1]-refs[0]))
535+
width := bits.Len32(uint32(refs[len(refs)-1] - refs[0]))
536536
w.buf2.PutByte(byte(width))
537537
for _, r := range refs {
538538
w.buf2.PutBits(uint64(r-refs[0]), width)
@@ -541,6 +541,8 @@ func (w *Writer) WritePostings(name, value string, it Postings) error {
541541
writeDeltaBlockPostings(&w.buf2, refs)
542542
case 4:
543543
writeBaseDeltaBlockPostings(&w.buf2, refs)
544+
case 5:
545+
writeBitmapPostings(&w.buf2, refs)
544546
}
545547

546548
w.uint32s = refs
@@ -1061,6 +1063,9 @@ func (dec *Decoder) Postings(b []byte) (int, Postings, error) {
10611063
case 4:
10621064
l := d.Get()
10631065
return n, newBaseDeltaBlockPostings(l, n), d.Err()
1066+
case 5:
1067+
l := d.Get()
1068+
return n, newBitmapPostings(l), d.Err()
10641069
default:
10651070
return n, EmptyPostings(), d.Err()
10661071
}

index/index_test.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"github.com/prometheus/tsdb/chunkenc"
2626
"github.com/prometheus/tsdb/chunks"
2727
"github.com/prometheus/tsdb/encoding"
28+
"github.com/prometheus/tsdb/fileutil"
2829
"github.com/prometheus/tsdb/labels"
2930
"github.com/prometheus/tsdb/testutil"
3031
)
@@ -338,6 +339,12 @@ func TestPersistence_index_e2e(t *testing.T) {
338339
err = iw.Close()
339340
testutil.Ok(t, err)
340341

342+
f, err := fileutil.OpenMmapFile(filepath.Join(dir, indexFilename))
343+
testutil.Ok(t, err)
344+
toc, err := NewTOCFromByteSlice(realByteSlice(f.Bytes()))
345+
testutil.Ok(t, err)
346+
t.Log("size of postings =", toc.LabelIndicesTable-toc.Postings)
347+
341348
ir, err := NewFileReader(filepath.Join(dir, indexFilename))
342349
testutil.Ok(t, err)
343350

index/postings.go

Lines changed: 153 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -692,8 +692,8 @@ func (it *bigEndianPostings) Err() error {
692692
return nil
693693
}
694694

695-
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock.
696-
const postingsType = 4
695+
// 1 is bigEndian, 2 is baseDelta, 3 is deltaBlock, 4 is baseDeltaBlock, 5 is bitmapPostings.
696+
const postingsType = 5
697697

698698
type bitSlice struct {
699699
bstream []byte
@@ -731,8 +731,8 @@ func (bs *bitSlice) readBits(offset int) uint64 {
731731
return u
732732
}
733733

734-
if nbits > int(8 - count) {
735-
u = (u << uint(8 - count)) | uint64((bs.bstream[idx]<<count)>>count)
734+
if nbits > int(8-count) {
735+
u = (u << uint(8-count)) | uint64((bs.bstream[idx]<<count)>>count)
736736
nbits -= int(8 - count)
737737
idx += 1
738738

@@ -827,10 +827,10 @@ func (it *deltaBlockPostings) At() uint64 {
827827
}
828828

829829
func (it *deltaBlockPostings) Next() bool {
830-
if it.offset >= len(it.bs.bstream) << 3 || it.idx >= it.size {
830+
if it.offset >= len(it.bs.bstream)<<3 || it.idx >= it.size {
831831
return false
832832
}
833-
if it.offset % (deltaBlockSize << 3) == 0 {
833+
if it.offset%(deltaBlockSize<<3) == 0 {
834834
val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:])
835835
if n < 1 {
836836
return false
@@ -854,13 +854,13 @@ func (it *deltaBlockPostings) Next() bool {
854854
it.idxBlock = 1
855855
return true
856856
}
857-
857+
858858
it.cur = it.bs.readBits(it.offset) + it.cur
859859
it.offset += it.bs.width
860860
it.idx += 1
861861
it.idxBlock += 1
862862
if it.idxBlock == it.count {
863-
it.offset = ((it.offset-1) / (deltaBlockSize << 3) + 1) * deltaBlockSize << 3
863+
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
864864
}
865865
return true
866866
}
@@ -871,18 +871,18 @@ func (it *deltaBlockPostings) Seek(x uint64) bool {
871871
}
872872

873873
startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize
874-
num := (len(it.bs.bstream) - 1) / deltaBlockSize - (it.offset - 1) / (deltaBlockSize << 3) + 1
874+
num := (len(it.bs.bstream)-1)/deltaBlockSize - (it.offset-1)/(deltaBlockSize<<3) + 1
875875
// Do binary search between current position and end.
876876
i := sort.Search(num, func(i int) bool {
877877
val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:])
878878
return val > x
879879
})
880880
if i > 0 {
881-
// Go to the previous block because the previous block
881+
// Go to the previous block because the previous block
882882
// may contain the first value >= x.
883883
i -= 1
884884
}
885-
it.offset = (startOff + i * deltaBlockSize) << 3
885+
it.offset = (startOff + i*deltaBlockSize) << 3
886886
for it.Next() {
887887
if it.At() >= x {
888888
return true
@@ -903,9 +903,9 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
903903
var preVal uint32
904904
var max int
905905
for i < len(arr) {
906-
e.PutUvarint32(arr[i]) // Put base.
906+
e.PutUvarint32(arr[i]) // Put base.
907907
e.PutUvarint64(uint64(i)) // Put idx.
908-
remaining = (deltaBlockSize - (len(e.B) - startLen) % deltaBlockSize - 1) << 3
908+
remaining = (deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1) << 3
909909
deltas = deltas[:0]
910910
preVal = arr[i]
911911
max = -1
@@ -916,7 +916,7 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
916916
if cur <= max {
917917
cur = max
918918
}
919-
if remaining - cur * (len(deltas) + 1) - (((bits.Len(uint(len(deltas))) >> 3) + 1) << 3) >= 0 {
919+
if remaining-cur*(len(deltas)+1)-(((bits.Len(uint(len(deltas)))>>3)+1)<<3) >= 0 {
920920
deltas = append(deltas, delta)
921921
max = cur
922922
preVal = arr[i]
@@ -946,7 +946,7 @@ func writeDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
946946
e.PutBits(uint64(0), remaining)
947947
}
948948
e.Count = 0
949-
949+
950950
// There can be one more extra 0.
951951
e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize]
952952
}
@@ -983,10 +983,10 @@ func (it *baseDeltaBlockPostings) At() uint64 {
983983
}
984984

985985
func (it *baseDeltaBlockPostings) Next() bool {
986-
if it.offset >= len(it.bs.bstream) << 3 || it.idx >= it.size {
986+
if it.offset >= len(it.bs.bstream)<<3 || it.idx >= it.size {
987987
return false
988988
}
989-
if it.offset % (deltaBlockSize << 3) == 0 {
989+
if it.offset%(deltaBlockSize<<3) == 0 {
990990
val, n := binary.Uvarint(it.bs.bstream[it.offset>>3:])
991991
if n < 1 {
992992
return false
@@ -1011,13 +1011,13 @@ func (it *baseDeltaBlockPostings) Next() bool {
10111011
it.idxBlock = 1
10121012
return true
10131013
}
1014-
1014+
10151015
it.cur = it.bs.readBits(it.offset) + it.base
10161016
it.offset += it.bs.width
10171017
it.idx += 1
10181018
it.idxBlock += 1
10191019
if it.idxBlock == it.count {
1020-
it.offset = ((it.offset-1) / (deltaBlockSize << 3) + 1) * deltaBlockSize << 3
1020+
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
10211021
}
10221022
return true
10231023
}
@@ -1028,40 +1028,40 @@ func (it *baseDeltaBlockPostings) Seek(x uint64) bool {
10281028
}
10291029

10301030
startOff := (it.offset - 1) / (deltaBlockSize << 3) * deltaBlockSize
1031-
num := (len(it.bs.bstream) - 1) / deltaBlockSize - (it.offset - 1) / (deltaBlockSize << 3) + 1
1031+
num := (len(it.bs.bstream)-1)/deltaBlockSize - (it.offset-1)/(deltaBlockSize<<3) + 1
10321032
// Do binary search between current position and end.
10331033
i := sort.Search(num, func(i int) bool {
10341034
val, _ := binary.Uvarint(it.bs.bstream[startOff+i*deltaBlockSize:])
10351035
return val > x
10361036
})
10371037
if i > 0 {
1038-
// Go to the previous block because the previous block
1038+
// Go to the previous block because the previous block
10391039
// may contain the first value >= x.
10401040
i -= 1
10411041
}
1042-
it.offset = (startOff + i * deltaBlockSize) << 3
1043-
1042+
it.offset = (startOff + i*deltaBlockSize) << 3
1043+
10441044
// Read base, idx, and width.
10451045
it.Next()
10461046
if x <= it.base {
10471047
return true
10481048
} else {
10491049
temp := x - it.base
1050-
j := sort.Search(it.count - it.idxBlock, func(i int) bool {
1051-
return it.bs.readBits(it.offset + i * it.bs.width) >= temp
1050+
j := sort.Search(it.count-it.idxBlock, func(i int) bool {
1051+
return it.bs.readBits(it.offset+i*it.bs.width) >= temp
10521052
})
10531053

1054-
if j < it.count - it.idxBlock {
1054+
if j < it.count-it.idxBlock {
10551055
it.offset += j * it.bs.width
10561056
it.cur = it.bs.readBits(it.offset) + it.base
10571057
it.offset += it.bs.width
10581058
it.idxBlock += j + 1
10591059
it.idx += j + 1
10601060
if it.idxBlock == it.count {
1061-
it.offset = ((it.offset-1) / (deltaBlockSize << 3) + 1) * deltaBlockSize << 3
1061+
it.offset = ((it.offset-1)/(deltaBlockSize<<3) + 1) * deltaBlockSize << 3
10621062
}
10631063
} else {
1064-
it.offset = (startOff + (i + 1) * deltaBlockSize) << 3
1064+
it.offset = (startOff + (i+1)*deltaBlockSize) << 3
10651065
return it.Next()
10661066
}
10671067
return true
@@ -1080,17 +1080,17 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
10801080
var base uint32
10811081
var max int
10821082
for i < len(arr) {
1083-
e.PutUvarint32(arr[i]) // Put base.
1083+
e.PutUvarint32(arr[i]) // Put base.
10841084
e.PutUvarint64(uint64(i)) // Put idx.
1085-
remaining = (deltaBlockSize - (len(e.B) - startLen) % deltaBlockSize - 1) << 3
1085+
remaining = (deltaBlockSize - (len(e.B)-startLen)%deltaBlockSize - 1) << 3
10861086
deltas = deltas[:0]
10871087
base = arr[i]
10881088
max = -1
10891089
i += 1
10901090
for i < len(arr) {
10911091
delta := arr[i] - base
10921092
cur := bits.Len32(delta)
1093-
if remaining - cur * (len(deltas) + 1) - (((bits.Len(uint(len(deltas))) >> 3) + 1) << 3) >= 0 {
1093+
if remaining-cur*(len(deltas)+1)-(((bits.Len(uint(len(deltas)))>>3)+1)<<3) >= 0 {
10941094
deltas = append(deltas, delta)
10951095
max = cur
10961096
} else {
@@ -1119,8 +1119,129 @@ func writeBaseDeltaBlockPostings(e *encoding.Encbuf, arr []uint32) {
11191119
e.PutBits(uint64(0), remaining)
11201120
}
11211121
e.Count = 0
1122-
1122+
11231123
// There can be one more extra 0.
11241124
e.B = e.B[:len(e.B)-(len(e.B)-startLen)%deltaBlockSize]
11251125
}
11261126
}
1127+
1128+
// 8bits -> 256/8=32bytes, 12bits -> 4096/8=512bytes, 16bits -> 65536/8=8192bytes.
1129+
const bitmapBits = 8
1130+
1131+
// Bitmap block format.
1132+
// ┌──────────┬────────┐
1133+
// │ key <4b> │ bitmap │
1134+
// └──────────┴────────┘
1135+
type bitmapPostings struct {
1136+
bs []byte
1137+
cur uint64
1138+
inside bool
1139+
idx1 int
1140+
idx2 int
1141+
bitmapSize int
1142+
key uint32
1143+
}
1144+
1145+
func newBitmapPostings(bstream []byte) *bitmapPostings {
1146+
return &bitmapPostings{bs: bstream, bitmapSize: 1 << (bitmapBits - 3)}
1147+
}
1148+
1149+
func (it *bitmapPostings) At() uint64 {
1150+
return it.cur
1151+
}
1152+
1153+
func (it *bitmapPostings) Next() bool {
1154+
if it.inside {
1155+
for it.idx1 < it.bitmapSize {
1156+
if it.bs[it.idx1+4] == byte(0) {
1157+
it.idx1 += 1
1158+
continue
1159+
}
1160+
for it.idx1 < it.bitmapSize {
1161+
if it.bs[it.idx1+4]&(1<<uint(7-it.idx2)) != byte(0) {
1162+
it.cur = uint64(it.key<<bitmapBits) + uint64(it.idx1*8+it.idx2)
1163+
it.idx2 += 1
1164+
if it.idx2 == 8 {
1165+
it.idx1 += 1
1166+
it.idx2 = 0
1167+
}
1168+
return true
1169+
} else {
1170+
it.idx2 += 1
1171+
if it.idx2 == 8 {
1172+
it.idx1 += 1
1173+
it.idx2 = 0
1174+
}
1175+
}
1176+
}
1177+
}
1178+
it.bs = it.bs[it.bitmapSize+4:]
1179+
it.inside = false
1180+
it.idx1 = 0
1181+
return it.Next()
1182+
} else {
1183+
if len(it.bs)-4 >= it.bitmapSize {
1184+
it.key = binary.BigEndian.Uint32(it.bs)
1185+
it.inside = true
1186+
return it.Next()
1187+
} else {
1188+
return false
1189+
}
1190+
}
1191+
}
1192+
1193+
func (it *bitmapPostings) Seek(x uint64) bool {
1194+
if it.cur >= x {
1195+
return true
1196+
}
1197+
curKey := uint32(x) >> bitmapBits
1198+
// curVal := uint32(x) & uint32((1 << uint(bitmapBits)) - 1)
1199+
i := sort.Search(len(it.bs)/(it.bitmapSize+4), func(i int) bool {
1200+
return binary.BigEndian.Uint32(it.bs[i*(it.bitmapSize+4):]) > curKey
1201+
})
1202+
if i > 0 {
1203+
i -= 1
1204+
if i > 0 {
1205+
it.idx1 = 0
1206+
it.idx2 = 0
1207+
it.bs = it.bs[i*(it.bitmapSize+4):]
1208+
it.inside = false
1209+
}
1210+
}
1211+
for it.Next() {
1212+
if it.At() >= x {
1213+
return true
1214+
}
1215+
}
1216+
return false
1217+
}
1218+
1219+
func (it *bitmapPostings) Err() error {
1220+
return nil
1221+
}
1222+
1223+
func writeBitmapPostings(e *encoding.Encbuf, arr []uint32) {
1224+
key := uint32(0xffffffff)
1225+
bitmapSize := 1 << (bitmapBits - 3)
1226+
mask := uint32((1 << uint(bitmapBits)) - 1)
1227+
var curKey uint32
1228+
var curVal uint32
1229+
var offset int // The starting offset of the bitmap of each block.
1230+
var idx1 int
1231+
var idx2 int
1232+
for _, val := range arr {
1233+
curKey = val >> bitmapBits
1234+
curVal = val & mask
1235+
idx1 = int(curVal) >> 3
1236+
idx2 = int(curVal) % 8
1237+
if curKey != key {
1238+
key = curKey
1239+
e.PutBE32(uint32(key))
1240+
offset = len(e.Get())
1241+
for i := 0; i < bitmapSize; i++ {
1242+
e.PutByte(byte(0))
1243+
}
1244+
}
1245+
e.B[offset+idx1] |= 1 << uint(7-idx2)
1246+
}
1247+
}

0 commit comments

Comments
 (0)