Skip to content

Commit

Permalink
cleanup: removes AndToSuperset, OrToSuperset, AndNotToSuperset methods
Browse files Browse the repository at this point in the history
  • Loading branch information
aliszka committed Jan 2, 2025
1 parent db43366 commit 95285c5
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 339 deletions.
175 changes: 0 additions & 175 deletions bitmap_opt.go
Original file line number Diff line number Diff line change
Expand Up @@ -785,79 +785,6 @@ func (b containerBufs) getOrNil(i int) []uint16 {
return b[i]
}

const minContainersForConcurrency = 16

// AndToSuperset calculates intersection of current and incoming bitmap
// It reuses containers present in current bitmap
// and utilize container buffers provided.
// Number of passed buffers indicates concurrency level
// (e.g. 4 buffers = merge will be performed by 4 goroutines).
//
// CAUTION: should be used only when current bitmap contained before
// all elements present in incoming bitmap
func (dst *Bitmap) AndToSuperset(src *Bitmap, containerBufs ...[]uint16) {
conc := len(containerBufs)
assert(conc > 0)

dstNumKeys := dst.keys.numKeys()
if src == nil {
concurrentlyOnRange(conc, dstNumKeys, func(_, from, to int) {
zeroOutSelectedContainers(dst, from, to)
})
return
}

srcNumKeys := src.keys.numKeys()
concurrentlyOnRange(conc, dstNumKeys, func(i, from, to int) {
andSelectedContainers(dst, src, from, to, 0, srcNumKeys, containerBufs[i])
})
}

// OrToSuperset calculates union of current and incoming bitmap
// It reuses containers present in current bitmap
// and utilize containers buffers provided.
// Number of passed buffers indicates concurrency level
// (e.g. 4 buffers = merge will be performed by 4 goroutines).
//
// CAUTION: should be used only when current bitmap contained before
// all elements present in incoming bitmap
func (dst *Bitmap) OrToSuperset(src *Bitmap, containerBufs ...[]uint16) {
conc := len(containerBufs)
assert(conc > 0)

if src == nil {
return
}

srcNumKeys := src.keys.numKeys()
concurrentlyOnRange(conc, srcNumKeys, func(i, from, to int) {
orSelectedContainers(dst, src, from, to, containerBufs[i])
})
}

// AndNotToSuperset calculates difference between current and incoming bitmap
// It reuses containers present in current bitmap
// and utilize containers buffers provided.
// Number of passed buffers indicates concurrency level
// (e.g. 4 buffers = merge will be performed by 4 goroutines).
//
// CAUTION: should be used only when current bitmap contained before
// all elements present in incoming bitmap
func (dst *Bitmap) AndNotToSuperset(src *Bitmap, containerBufs ...[]uint16) {
conc := len(containerBufs)
assert(conc > 0)

if src == nil {
return
}

dstNumKeys := dst.keys.numKeys()
srcNumKeys := src.keys.numKeys()
concurrentlyOnRange(conc, dstNumKeys, func(i, from, to int) {
andNotSelectedContainers(dst, src, from, to, 0, srcNumKeys, containerBufs[i])
})
}

func (ra *Bitmap) ConvertToBitmapContainers() {
for ai, an := 0, ra.keys.numKeys(); ai < an; ai++ {
ak := ra.keys.key(ai)
Expand All @@ -873,108 +800,6 @@ func (ra *Bitmap) ConvertToBitmapContainers() {
}
}

func concurrentlyOnRange(conc, max int, callback func(i, from, to int)) {
if conc == 1 || max < conc*minContainersForConcurrency {
callback(0, 0, max)
return
}

delta := max / conc

wg := new(sync.WaitGroup)
wg.Add(conc - 1)
for i := 0; i < conc-1; i++ {
go func(i int) {
callback(i, delta*i, delta*(i+1))
wg.Done()
}(i)
}
callback(conc-1, delta*(conc-1), max)
wg.Wait()
}

func zeroOutSelectedContainers(a *Bitmap, ai, an int) {
for ; ai < an; ai++ {
off := a.keys.val(ai)
zeroOutContainer(a.getContainer(off))
}
}

func andSelectedContainers(a, b *Bitmap, ai, an, bi, bn int, containerBuf []uint16) {
for ai < an && bi < bn {
ak := a.keys.key(ai)
bk := b.keys.key(bi)
if ak == bk {
off := a.keys.val(ai)
ac := a.getContainer(off)
off = b.keys.val(bi)
bc := b.getContainer(off)

if getCardinality(bc) == 0 {
zeroOutContainer(ac)
} else {
containerAndToSuperset(ac, bc, containerBuf)
}
ai++
bi++
} else if ak < bk {
off := a.keys.val(ai)
zeroOutContainer(a.getContainer(off))
ai++
} else {
bi++
}
}
for ; ai < an; ai++ {
off := a.keys.val(ai)
zeroOutContainer(a.getContainer(off))
}
}

func orSelectedContainers(a, b *Bitmap, bi, bn int, containerBuf []uint16) {
for ; bi < bn; bi++ {
off := b.keys.val(bi)
bc := b.getContainer(off)
if getCardinality(bc) == 0 {
continue
}

bk := b.keys.key(bi)
ai := a.keys.search(bk)
if ai >= a.keys.numKeys() || a.keys.key(ai) != bk {
// Container does not exist in dst.
panic("Current bitmap should have all containers of incoming bitmap")
} else {
// Container exists in dst as well. Do an inline containerOr.
off = a.keys.val(ai)
ac := a.getContainer(off)
containerOrToSuperset(ac, bc, containerBuf)
}
}
}

func andNotSelectedContainers(a, b *Bitmap, ai, an, bi, bn int, containerBuf []uint16) {
for ai < an && bi < bn {
ak := a.keys.key(ai)
bk := b.keys.key(bi)
if ak == bk {
off := b.keys.val(bi)
bc := b.getContainer(off)
if getCardinality(bc) != 0 {
off = a.keys.val(ai)
ac := a.getContainer(off)
containerAndNotToSuperset(ac, bc, containerBuf)
}
ai++
bi++
} else if ak < bk {
ai++
} else {
bi++
}
}
}

func (dst *Bitmap) CompareNumKeys(src *Bitmap) int {
if dst == nil && src == nil {
return 0
Expand Down
34 changes: 17 additions & 17 deletions bitmap_opt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2251,7 +2251,7 @@ func TestPrefillUtils(t *testing.T) {
})
}

func TestMergeToSuperset(t *testing.T) {
func TestMergeConcurrentlyWithBuffers(t *testing.T) {
run := func(t *testing.T, bufs [][]uint16) {
containerThreshold := uint64(math.MaxUint16 + 1)

Expand Down Expand Up @@ -2317,47 +2317,47 @@ func TestMergeToSuperset(t *testing.T) {

t.Run("and", func(t *testing.T) {
control.And(and)
superset.AndToSuperset(and, bufs...)
superset.AndConcBuf(and, bufs...)

require.Equal(t, 11389, superset.GetCardinality())
require.ElementsMatch(t, control.ToArray(), superset.ToArray())
})

t.Run("or", func(t *testing.T) {
control.Or(or)
superset.OrToSuperset(or, bufs...)
superset.OrConcBuf(or, bufs...)

require.Equal(t, 22750, superset.GetCardinality())
require.ElementsMatch(t, control.ToArray(), superset.ToArray())
})

t.Run("and not", func(t *testing.T) {
control.AndNot(andNot)
superset.AndNotToSuperset(andNot, bufs...)
superset.AndNotConcBuf(andNot, bufs...)

require.Equal(t, 9911, superset.GetCardinality())
require.ElementsMatch(t, control.ToArray(), superset.ToArray())
})

t.Run("2nd or", func(t *testing.T) {
control.Or(or)
superset.OrToSuperset(or, bufs...)
superset.OrConcBuf(or, bufs...)

require.Equal(t, 20730, superset.GetCardinality())
require.ElementsMatch(t, control.ToArray(), superset.ToArray())
})

t.Run("2nd and", func(t *testing.T) {
control.And(and)
superset.AndToSuperset(and, bufs...)
superset.AndConcBuf(and, bufs...)

require.Equal(t, 10369, superset.GetCardinality())
require.ElementsMatch(t, control.ToArray(), superset.ToArray())
})

t.Run("2nd and not", func(t *testing.T) {
control.AndNot(andNot)
superset.AndNotToSuperset(andNot, bufs...)
superset.AndNotConcBuf(andNot, bufs...)

require.Equal(t, 5520, superset.GetCardinality())
require.ElementsMatch(t, control.ToArray(), superset.ToArray())
Expand Down Expand Up @@ -2408,8 +2408,8 @@ func TestMergeToSuperset(t *testing.T) {
})
}

// go test -v -fuzz FuzzMergeToSuperset -fuzztime 600s -run ^$ github.com/weaviate/sroar
func FuzzMergeToSuperset(f *testing.F) {
// go test -v -fuzz FuzzMergeConcurrentlyWithBuffers -fuzztime 600s -run ^$ github.com/weaviate/sroar
func FuzzMergeConcurrentlyWithBuffers(f *testing.F) {
type testCase struct {
name string
countElements int
Expand Down Expand Up @@ -2474,20 +2474,20 @@ func FuzzMergeToSuperset(f *testing.F) {
f.Add(tc.countElements, tc.countSubsets, tc.countMerges, tc.countBuffers, tc.randSeed)
}

f.Fuzz(runMergeToSuperSetTest)
f.Fuzz(runMergeConcurrentlyWithBuffersTest)
}

func TestMergeToSuperset_VerifyFuzzCallback(t *testing.T) {
func TestMergeConcurrentlyWithBuffers_VerifyFuzzCallback(t *testing.T) {
t.Run("single buffer", func(t *testing.T) {
runMergeToSuperSetTest(t, 23_456, 17, 9, 1, 1724861525311)
runMergeConcurrentlyWithBuffersTest(t, 23_456, 17, 9, 1, 1724861525311)
})

t.Run("multiple buffers (concurrent)", func(t *testing.T) {
runMergeToSuperSetTest(t, 23_456, 17, 9, 4, 1724861525311)
runMergeConcurrentlyWithBuffersTest(t, 23_456, 17, 9, 4, 1724861525311)
})
}

func runMergeToSuperSetTest(t *testing.T,
func runMergeConcurrentlyWithBuffersTest(t *testing.T,
countElements, countSubsets, countMerges, countBuffers int, randSeed int64,
) {
if countElements < 100 || countElements > 50_000 {
Expand Down Expand Up @@ -2541,19 +2541,19 @@ func runMergeToSuperSetTest(t *testing.T,
switch mergeType := rnd.Intn(3); mergeType {
case 1:
t.Run(fmt.Sprintf("AND with %d", id), func(t *testing.T) {
superset.AndToSuperset(subset, buffers...)
superset.AndConcBuf(subset, buffers...)
control.And(subset)
assertMatches(t, superset, control)
})
case 2:
t.Run(fmt.Sprintf("AND NOT with %d", id), func(t *testing.T) {
superset.AndNotToSuperset(subset, buffers...)
superset.AndNotConcBuf(subset, buffers...)
control.AndNot(subset)
assertMatches(t, superset, control)
})
default:
t.Run(fmt.Sprintf("OR with %d", id), func(t *testing.T) {
superset.OrToSuperset(subset, buffers...)
superset.OrConcBuf(subset, buffers...)
control.Or(subset)
assertMatches(t, superset, control)
})
Expand Down
Loading

0 comments on commit 95285c5

Please sign in to comment.