Skip to content

Commit 6ff41cd

Browse files
committed
add more test cases
Signed-off-by: Edoardo Vacchi <[email protected]>
1 parent c666f2e commit 6ff41cd

File tree

1 file changed

+188
-45
lines changed

1 file changed

+188
-45
lines changed

pkg/plugins/scorer/precise_prefix_cache_test.go

Lines changed: 188 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
package scorer_test
22

33
import (
4-
"context"
54
"os"
65
"path/filepath"
76
"testing"
@@ -11,6 +10,7 @@ import (
1110
"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache"
1211
"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
1312
"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents"
13+
"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
1414
"github.com/stretchr/testify/require"
1515
k8stypes "k8s.io/apimachinery/pkg/types"
1616
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
@@ -19,12 +19,21 @@ import (
1919
)
2020

2121
func TestPrefixCacheTracking_Score(t *testing.T) {
22+
d, err := os.Getwd()
23+
require.NoError(t, err)
24+
modelDir := filepath.Join(d, "testdata")
25+
localTokenizerConfig := tokenization.LocalTokenizerConfig{
26+
ModelTokenizerMap: map[string]string{
27+
"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
28+
},
29+
}
30+
2231
testcases := []struct {
2332
name string
2433
pods []types.Pod
2534
request *types.LLMRequest
26-
kvBlockData map[kvblock.Key][]kvblock.PodEntry // KV-blocks to populate in the index
27-
wantScoresByAddress map[string]float64 // Use address as key instead of Pod objects
35+
kvBlockData func(prompt string, model string) map[kvblock.Key][]kvblock.PodEntry
36+
wantScoresByAddress map[string]float64
2837
}{
2938
{
3039
name: "nil request",
@@ -36,9 +45,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
3645
},
3746
},
3847
},
39-
request: nil,
40-
kvBlockData: nil,
41-
wantScoresByAddress: make(map[string]float64), // empty map instead of nil
48+
wantScoresByAddress: map[string]float64{}, // empty map
4249
},
4350
{
4451
name: "empty request body",
@@ -55,11 +62,10 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
5562
TargetModel: "test-model",
5663
Body: nil,
5764
},
58-
kvBlockData: nil, // no kv-blocks in index
59-
wantScoresByAddress: make(map[string]float64), // empty map instead of nil
65+
wantScoresByAddress: map[string]float64{}, // empty map
6066
},
6167
{
62-
name: "test normalized scores with different kv-block hits",
68+
name: "longest prefix scorer (default scorer)",
6369
pods: []types.Pod{
6470
&types.PodMetrics{
6571
Pod: &backend.Pod{
@@ -94,51 +100,184 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
94100
TargetModel: "test-model",
95101
Body: &types.LLMRequestBody{
96102
Completions: &types.CompletionsRequest{
97-
Prompt: "hello world",
103+
Prompt: "Testing prefix cache with multiple blocks of tokens. " +
104+
"First block should be cached across all pods in the test. " +
105+
"Second block will be cached on a subset of pods only. " +
106+
"Third block exists only on the pod with the longest prefix match.",
98107
},
99108
},
100109
},
101-
// Populate kvblock.Index with blocks such that:
102-
// - block1 exists on pod-a only (10 hits for pod-a)
103-
// - block2 exists on pod-a and pod-b (10 more hits for pod-a, 10 for pod-b)
104-
// - block3 exists on all pods (10 more hits each)
105-
// Total: pod-a=30, pod-b=20, pod-c=10 -> normalized to 1.0, 0.5, 0.0
106-
kvBlockData: map[kvblock.Key][]kvblock.PodEntry{
107-
{ModelName: "test-model", ChunkHash: 1}: {
108-
{PodIdentifier: "10.0.0.1:8080"},
110+
kvBlockData: func(prompt, model string) map[kvblock.Key][]kvblock.PodEntry {
111+
testTokenizer, err := tokenization.NewCachedLocalTokenizer(tokenization.LocalTokenizerConfig{
112+
ModelTokenizerMap: map[string]string{
113+
"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
114+
},
115+
})
116+
require.NoError(t, err)
117+
118+
// use the actual tokenizer on the test prompt
119+
tokens, _, err := testTokenizer.Encode(prompt, model)
120+
require.NoError(t, err)
121+
122+
// compute chunk hashes using the default block size
123+
tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
124+
chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
125+
126+
require.GreaterOrEqual(t, len(chunkKeys), 3, "Need at least 3 chunks for test")
127+
128+
// populate kvblock.Index to test longest prefix matching:
129+
// - chunk0 (first chunk): all pods have it (common prefix start)
130+
// - chunk1: pod-a and pod-b have it (pod-c drops off after chunk0)
131+
// - chunk2: only pod-a has it (pod-b drops off after chunk1)
132+
// LongestPrefixScorer uses intersection, so:
133+
// pod-a: 3 chunks (0,1,2) -> score 3
134+
// pod-b: 2 chunks (0,1) -> score 2
135+
// pod-c: 1 chunk (0) -> score 1
136+
// Normalized: (3-1)/(3-1) = 1.0, (2-1)/(3-1) = 0.5, (1-1)/(3-1) = 0.0
137+
138+
return map[kvblock.Key][]kvblock.PodEntry{
139+
{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
140+
{PodIdentifier: "10.0.0.1:8080"},
141+
{PodIdentifier: "10.0.0.2:8080"},
142+
{PodIdentifier: "10.0.0.3:8080"},
143+
},
144+
{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
145+
{PodIdentifier: "10.0.0.1:8080"},
146+
{PodIdentifier: "10.0.0.2:8080"},
147+
},
148+
{ModelName: model, ChunkHash: chunkKeys[2].ChunkHash}: {
149+
{PodIdentifier: "10.0.0.1:8080"},
150+
},
151+
}
152+
},
153+
wantScoresByAddress: map[string]float64{
154+
"10.0.0.1:8080": 1.0, // 3 chunks -> (3-1)/(3-1) = 1.0
155+
"10.0.0.2:8080": 0.5, // 2 chunks -> (2-1)/(3-1) = 0.5
156+
"10.0.0.3:8080": 0.0, // 1 chunk -> (1-1)/(3-1) = 0.0
157+
},
158+
},
159+
{
160+
name: "no cache hits (empty index)",
161+
pods: []types.Pod{
162+
&types.PodMetrics{
163+
Pod: &backend.Pod{
164+
NamespacedName: k8stypes.NamespacedName{Name: "pod-a"},
165+
Address: "10.0.0.1:8080",
166+
},
109167
},
110-
{ModelName: "test-model", ChunkHash: 2}: {
111-
{PodIdentifier: "10.0.0.1:8080"},
112-
{PodIdentifier: "10.0.0.2:8080"},
168+
&types.PodMetrics{
169+
Pod: &backend.Pod{
170+
NamespacedName: k8stypes.NamespacedName{Name: "pod-b"},
171+
Address: "10.0.0.2:8080",
172+
},
113173
},
114-
{ModelName: "test-model", ChunkHash: 3}: {
115-
{PodIdentifier: "10.0.0.1:8080"},
116-
{PodIdentifier: "10.0.0.2:8080"},
117-
{PodIdentifier: "10.0.0.3:8080"},
174+
&types.PodMetrics{
175+
Pod: &backend.Pod{
176+
NamespacedName: k8stypes.NamespacedName{Name: "pod-c"},
177+
Address: "10.0.0.3:8080",
178+
},
179+
},
180+
},
181+
request: &types.LLMRequest{
182+
RequestId: "test-request-3",
183+
TargetModel: "test-model",
184+
Body: &types.LLMRequestBody{
185+
Completions: &types.CompletionsRequest{
186+
Prompt: "This prompt has never been cached before on any pod.",
187+
},
118188
},
119189
},
190+
kvBlockData: nil, // no cached data
120191
wantScoresByAddress: map[string]float64{
121-
"10.0.0.1:8080": 1.0, // 30 hits -> (30-10)/(30-10) = 1.0
122-
"10.0.0.2:8080": 0.5, // 20 hits -> (20-10)/(30-10) = 0.5
123-
"10.0.0.3:8080": 0.0, // 10 hits -> (10-10)/(30-10) = 0.0
192+
// when no pods have any cache hits, all should get equal scores (0.0)
193+
"10.0.0.1:8080": 0.0,
194+
"10.0.0.2:8080": 0.0,
195+
"10.0.0.3:8080": 0.0,
196+
},
197+
},
198+
{
199+
name: "all pods have equal prefix length",
200+
pods: []types.Pod{
201+
&types.PodMetrics{
202+
Pod: &backend.Pod{
203+
NamespacedName: k8stypes.NamespacedName{Name: "pod-a"},
204+
Address: "10.0.0.1:8080",
205+
},
206+
},
207+
&types.PodMetrics{
208+
Pod: &backend.Pod{
209+
NamespacedName: k8stypes.NamespacedName{Name: "pod-b"},
210+
Address: "10.0.0.2:8080",
211+
},
212+
},
213+
&types.PodMetrics{
214+
Pod: &backend.Pod{
215+
NamespacedName: k8stypes.NamespacedName{Name: "pod-c"},
216+
Address: "10.0.0.3:8080",
217+
},
218+
},
219+
},
220+
request: &types.LLMRequest{
221+
RequestId: "test-request-4",
222+
TargetModel: "test-model",
223+
Body: &types.LLMRequestBody{
224+
Completions: &types.CompletionsRequest{
225+
Prompt: "All pods have the same cached prefix for this particular prompt text. " +
226+
"We need to ensure this prompt is long enough to generate at least two token chunks. " +
227+
"This additional text should provide sufficient tokens to meet the minimum requirement.",
228+
},
229+
},
230+
},
231+
kvBlockData: func(prompt, model string) map[kvblock.Key][]kvblock.PodEntry {
232+
testTokenizer, err := tokenization.NewCachedLocalTokenizer(tokenization.LocalTokenizerConfig{
233+
ModelTokenizerMap: map[string]string{
234+
"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
235+
},
236+
})
237+
require.NoError(t, err)
238+
239+
tokens, _, err := testTokenizer.Encode(prompt, model)
240+
require.NoError(t, err)
241+
242+
tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
243+
chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
244+
245+
require.GreaterOrEqual(t, len(chunkKeys), 2, "Need at least 2 chunks for test")
246+
247+
// all pods have the same 2 chunks cached
248+
return map[kvblock.Key][]kvblock.PodEntry{
249+
{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
250+
{PodIdentifier: "10.0.0.1:8080"},
251+
{PodIdentifier: "10.0.0.2:8080"},
252+
{PodIdentifier: "10.0.0.3:8080"},
253+
},
254+
{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
255+
{PodIdentifier: "10.0.0.1:8080"},
256+
{PodIdentifier: "10.0.0.2:8080"},
257+
{PodIdentifier: "10.0.0.3:8080"},
258+
},
259+
}
260+
},
261+
wantScoresByAddress: map[string]float64{
262+
// when all pods have equal cache (minScore == maxScore), the implementation
263+
// returns 1.0 for all pods to avoid division by zero
264+
"10.0.0.1:8080": 1.0,
265+
"10.0.0.2:8080": 1.0,
266+
"10.0.0.3:8080": 1.0,
124267
},
125268
},
126269
}
127270

128271
for _, tt := range testcases {
129272
t.Run(tt.name, func(t *testing.T) {
130-
ctx := context.Background()
131-
d, _ := os.Getwd()
132-
modelDir := filepath.Join(d, "/testdata")
273+
ctx := t.Context()
133274

134275
kvcacheConfig, err := kvcache.NewDefaultConfig()
135-
kvcacheConfig.TokenizersPoolConfig.WorkersCount = 1
136-
//kvcacheConfig.TokenizersPoolConfig.LocalTokenizerConfig.AutoDiscoveryDir = modelDir
137-
kvcacheConfig.TokenizersPoolConfig.LocalTokenizerConfig.ModelTokenizerMap = map[string]string{
138-
"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
276+
kvcacheConfig.TokenizersPoolConfig = &tokenization.Config{
277+
WorkersCount: 1,
278+
MinPrefixOverlapRatio: 0.8,
279+
LocalTokenizerConfig: &localTokenizerConfig,
139280
}
140-
kvcacheConfig.TokenizersPoolConfig.HFTokenizerConfig.Enabled = false
141-
kvcacheConfig.TokenizersPoolConfig.HFTokenizerConfig.TokenizersCacheDir = "./build/tokenizers"
142281
require.NoError(t, err)
143282

144283
prefixCacheScorer, err := scorer.New(ctx, scorer.PrecisePrefixCachePluginConfig{
@@ -148,15 +287,19 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
148287
require.NoError(t, err)
149288
require.NotNil(t, prefixCacheScorer)
150289

151-
// prefill
152-
_ = prefixCacheScorer.Score(ctx, nil, tt.request, tt.pods)
153-
154-
// Populate the kvblock.Index with test data
155-
if tt.kvBlockData != nil {
290+
// populate the kvblock.Index with test data
291+
if tt.kvBlockData != nil && tt.request != nil && tt.request.Body != nil {
156292
kvBlockIndex := prefixCacheScorer.KVBlockIndex()
157-
for key, entries := range tt.kvBlockData {
158-
keys := []kvblock.Key{key}
159-
err := kvBlockIndex.Add(ctx, keys, entries)
293+
var prompt string
294+
if tt.request.Body.Completions != nil {
295+
prompt = tt.request.Body.Completions.Prompt
296+
} else if tt.request.Body.ChatCompletions != nil {
297+
// ChatCompletions seem to be hanging right now
298+
t.Fatalf("Not yet implemented")
299+
}
300+
blockData := tt.kvBlockData(prompt, tt.request.TargetModel)
301+
for key, entries := range blockData {
302+
err := kvBlockIndex.Add(ctx, []kvblock.Key{key}, entries)
160303
require.NoError(t, err)
161304
}
162305
}

0 commit comments

Comments
 (0)