Skip to content

Commit 4f6a084

Browse files
authored
Merge pull request #10 from C-Pro/feature/long-tails
KV key tail optimization
2 parents 2f51301 + 2c54ed3 commit 4f6a084

File tree

4 files changed

+679
-20
lines changed

4 files changed

+679
-20
lines changed

bench_test.go

+8-2
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ func BenchmarkKVListByPrefix(b *testing.B) {
203203
c := NewKV[string](NewMapCache[string, string]())
204204
keys := make([]string, 100_000)
205205
for i := 0; i < 100_000; i++ {
206-
l := rand.Intn(36)
206+
l := rand.Intn(15)+15
207207
unique := randomString(l)
208208
keys[i] = unique
209209
for j := 0; j < 10; j++ {
@@ -213,6 +213,12 @@ func BenchmarkKVListByPrefix(b *testing.B) {
213213

214214
b.ResetTimer()
215215
for i := 0; i < b.N; i++ {
216-
_, _ = c.ListByPrefix(keys[i%len(keys)])
216+
res, err := c.ListByPrefix(keys[i%len(keys)])
217+
if err != nil {
218+
b.Errorf("unexpected error in ListByPrefix: %v", err)
219+
}
220+
if len(res) != 10 {
221+
b.Errorf("expected len 10, but got %d", len(res))
222+
}
217223
}
218224
}

kv.go

+132-15
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package geche
22

33
import (
4+
"bytes"
45
"sync"
56
)
67

@@ -9,8 +10,8 @@ import (
910
const maxKeyLength = 512
1011

1112
type trieNode struct {
12-
// character
13-
c byte
13+
// Node suffix. Single byte for most nodes, but can be longer for tail node.
14+
b []byte
1415
// depth level
1516
d int
1617

@@ -33,7 +34,7 @@ type trieNode struct {
3334
func (n *trieNode) addToList(node *trieNode) *trieNode {
3435
curr := n
3536
for {
36-
if node.c < curr.c {
37+
if node.b[0] < curr.b[0] {
3738
node.prev = curr.prev
3839
node.next = curr
3940
curr.prev = node
@@ -68,7 +69,7 @@ func (n *trieNode) addToList(node *trieNode) *trieNode {
6869
func (n *trieNode) removeFromList(c byte) (*trieNode, bool) {
6970
curr := n
7071
for {
71-
if curr.c == c {
72+
if curr.b[0] == c {
7273
if curr.prev != nil {
7374
curr.prev.next = curr.next
7475
}
@@ -125,21 +126,22 @@ func (kv *KV[V]) Set(key string, value V) {
125126
return
126127
}
127128

129+
keyb := []byte(key)
128130
node := kv.trie
129-
for i := 0; i < len(key); i++ {
131+
for len(keyb) > 0 {
130132
if node.down == nil {
131133
// Creating new level.
132134
node.down = make(map[byte]*trieNode)
133135
}
134136

135-
next := node.down[key[i]]
137+
next := node.down[keyb[0]]
136138
if next == nil {
137139
// Creating new node.
138140
next = &trieNode{
139-
c: key[i],
141+
b: keyb,
140142
d: node.d + 1,
141143
}
142-
node.down[key[i]] = next
144+
node.down[keyb[0]] = next
143145
if node.nextLevelHead == nil {
144146
node.nextLevelHead = next
145147
} else {
@@ -149,15 +151,101 @@ func (kv *KV[V]) Set(key string, value V) {
149151
node.nextLevelHead = head
150152
}
151153
}
154+
} else if len(next.b) == 1 {
155+
// Single byte nodes are a simple case.
156+
} else {
157+
// Multi byte nodes require splitting.
158+
159+
// Removing node from the linked list.
160+
head, empty := node.nextLevelHead.removeFromList(keyb[0])
161+
if empty {
162+
node.nextLevelHead = nil
163+
} else if head != nil {
164+
node.nextLevelHead = head
165+
}
166+
167+
commonPrefixLen := commonPrefixLen(keyb, next.b)
168+
for i := 0; i < commonPrefixLen; i++ {
169+
// Creating new single-byte node.
170+
newNode := &trieNode{
171+
b: []byte{keyb[i]},
172+
d: node.d + 1,
173+
down: make(map[byte]*trieNode),
174+
}
175+
node.down[keyb[i]] = newNode
176+
if node.nextLevelHead == nil {
177+
node.nextLevelHead = newNode
178+
} else {
179+
head := node.nextLevelHead.addToList(newNode)
180+
if head != nil {
181+
node.nextLevelHead = head
182+
}
183+
}
184+
185+
node = newNode
186+
}
187+
188+
if (bytes.Equal(next.b, keyb[:commonPrefixLen]) && next.terminal) || len(keyb) == commonPrefixLen {
189+
// If last node is end of key, or end of the node we are splitting, mark it as terminal.
190+
node.terminal = true
191+
}
192+
193+
// Adding removed node back.
194+
if len(next.b) > commonPrefixLen {
195+
// Creating new suffix (potentially multi-byte) node.
196+
newNode := &trieNode{
197+
b: next.b[commonPrefixLen:],
198+
d: node.d + 1,
199+
terminal: true,
200+
}
201+
node.down[next.b[commonPrefixLen]] = newNode
202+
node.nextLevelHead = newNode
203+
}
204+
205+
// Adding new tail node.
206+
if len(keyb) > commonPrefixLen {
207+
// Creating new suffix (potentially multi-byte) node.
208+
newNode := &trieNode{
209+
b: keyb[commonPrefixLen:],
210+
d: node.d + 1,
211+
terminal: true,
212+
}
213+
node.down[keyb[commonPrefixLen]] = newNode
214+
if node.nextLevelHead == nil {
215+
node.nextLevelHead = newNode
216+
} else {
217+
head := node.nextLevelHead.addToList(newNode)
218+
if head != nil {
219+
node.nextLevelHead = head
220+
}
221+
}
222+
}
223+
224+
// keyb = keyb[commonPrefixLen:]
225+
// continue
226+
return
152227
}
153228

229+
keyb = keyb[commonPrefixLen(keyb, next.b):]
154230
node = next
155231
}
156232

157233
node.terminal = true
158234
}
159235

160-
// DFS starts with last node of the key prefix.
236+
func commonPrefixLen(a, b []byte) int {
237+
i := 0
238+
for ; i < len(a) && i < len(b); i++ {
239+
if a[i] != b[i] {
240+
return i
241+
}
242+
}
243+
244+
return i
245+
}
246+
247+
// Depth First Search starts with last node of the key prefix and traverses the trie,
248+
// appending all terminal nodes to the result.
161249
func (kv *KV[V]) dfs(node *trieNode, prefix []byte) ([]V, error) {
162250
res := []V{}
163251
key := make([]byte, len(prefix), maxKeyLength)
@@ -177,6 +265,7 @@ func (kv *KV[V]) dfs(node *trieNode, prefix []byte) ([]V, error) {
177265
return res, nil
178266
}
179267

268+
// Instead of recursive DFS, we use stack-based approach.
180269
stack := make([]*trieNode, 0, maxKeyLength)
181270
stack = append(stack, node.nextLevelHead)
182271
var (
@@ -190,18 +279,26 @@ func (kv *KV[V]) dfs(node *trieNode, prefix []byte) ([]V, error) {
190279
break
191280
}
192281

282+
// Pop the top node from the stack.
193283
top = stack[len(stack)-1]
194284
stack = stack[:len(stack)-1]
195285

196286
if top.d > prevDepth {
197287
// We have descended to the next level.
198-
key = append(key, top.c)
288+
key = append(key, top.b...)
199289
} else if top.d < prevDepth {
200290
// We have ascended to the previous level.
201-
key = key[:len(key)-(prevDepth-top.d)]
202-
key[len(key)-1] = top.c
291+
key = key[:top.d]
292+
key[len(key)-1] = top.b[0]
293+
if len(top.b) > 1 {
294+
key = append(key, top.b[1:]...)
295+
}
203296
} else {
204-
key[len(key)-1] = top.c
297+
key = key[:top.d]
298+
key[len(key)-1] = top.b[0]
299+
if len(top.b) > 1 {
300+
key = append(key, top.b[1:]...)
301+
}
205302
}
206303
prevDepth = top.d
207304

@@ -237,6 +334,14 @@ func (kv *KV[V]) ListByPrefix(prefix string) ([]V, error) {
237334
if next == nil {
238335
return nil, nil
239336
}
337+
// If we reached a multibyte tail node, we can return its value,
338+
// since tail nodes have no descendants.
339+
if len(next.b) > 1 && len(next.b) >= len(prefix)-i {
340+
if bytes.Equal(next.b[:len(prefix)-i], []byte(prefix)[i:]) {
341+
v, err := kv.data.Get(prefix + string(next.b[len(prefix)-i:]))
342+
return []V{v}, err
343+
}
344+
}
240345
node = next
241346
}
242347

@@ -255,6 +360,7 @@ func (kv *KV[V]) Del(key string) error {
255360

256361
node := kv.trie
257362
stack := []*trieNode{}
363+
found := false
258364
for i := 0; i < len(key); i++ {
259365
next := node.down[key[i]]
260366
if next == nil {
@@ -264,20 +370,31 @@ func (kv *KV[V]) Del(key string) error {
264370

265371
stack = append(stack, node)
266372
node = next
373+
if bytes.Equal(node.b, []byte(key)[i:]) {
374+
if node.terminal {
375+
found = true
376+
}
377+
break
378+
}
267379
}
268380

381+
if !found {
382+
// If we are here, the key does not exist.
383+
return kv.data.Del(key)
384+
}
385+
269386
node.terminal = false
270387

271388
// Go back the stack removing nodes with no descendants.
272389
for i := len(stack) - 1; i >= 0; i-- {
273390
prev := stack[i]
274391
stack = stack[:i]
275392
if node.nextLevelHead == nil {
276-
head, empty := prev.nextLevelHead.removeFromList(node.c)
393+
head, empty := prev.nextLevelHead.removeFromList(node.b[0])
277394
if head != nil || (head == nil && empty) {
278395
prev.nextLevelHead = head
279396
}
280-
delete(prev.down, node.c)
397+
delete(prev.down, node.b[0])
281398
}
282399

283400
if prev.terminal || len(prev.down) > 0 && prev == kv.trie {

0 commit comments

Comments
 (0)