11package scorer_test
22
33import (
4- "context"
54 "os"
65 "path/filepath"
76 "testing"
@@ -11,6 +10,7 @@ import (
1110 "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache"
1211 "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
1312 "github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents"
13+ "github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
1414 "github.com/stretchr/testify/require"
1515 k8stypes "k8s.io/apimachinery/pkg/types"
1616 "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
@@ -19,12 +19,21 @@ import (
1919)
2020
2121func TestPrefixCacheTracking_Score (t * testing.T ) {
22+ d , err := os .Getwd ()
23+ require .NoError (t , err )
24+ modelDir := filepath .Join (d , "testdata" )
25+ localTokenizerConfig := tokenization.LocalTokenizerConfig {
26+ ModelTokenizerMap : map [string ]string {
27+ "test-model" : filepath .Join (modelDir , "test-model/tokenizer.json" ),
28+ },
29+ }
30+
2231 testcases := []struct {
2332 name string
2433 pods []types.Pod
2534 request * types.LLMRequest
26- kvBlockData map [kvblock.Key ][]kvblock.PodEntry // KV-blocks to populate in the index
27- wantScoresByAddress map [string ]float64 // Use address as key instead of Pod objects
35+ kvBlockData func ( prompt string , model string ) map [kvblock.Key ][]kvblock.PodEntry
36+ wantScoresByAddress map [string ]float64
2837 }{
2938 {
3039 name : "nil request" ,
@@ -36,9 +45,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
3645 },
3746 },
3847 },
39- request : nil ,
40- kvBlockData : nil ,
41- wantScoresByAddress : make (map [string ]float64 ), // empty map instead of nil
48+ wantScoresByAddress : map [string ]float64 {}, // empty map
4249 },
4350 {
4451 name : "empty request body" ,
@@ -55,11 +62,10 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
5562 TargetModel : "test-model" ,
5663 Body : nil ,
5764 },
58- kvBlockData : nil , // no kv-blocks in index
59- wantScoresByAddress : make (map [string ]float64 ), // empty map instead of nil
65+ wantScoresByAddress : map [string ]float64 {}, // empty map
6066 },
6167 {
62- name : "test normalized scores with different kv-block hits " ,
68+ name : "longest prefix scorer (default scorer) " ,
6369 pods : []types.Pod {
6470 & types.PodMetrics {
6571 Pod : & backend.Pod {
@@ -94,51 +100,184 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
94100 TargetModel : "test-model" ,
95101 Body : & types.LLMRequestBody {
96102 Completions : & types.CompletionsRequest {
97- Prompt : "hello world" ,
103+ Prompt : "Testing prefix cache with multiple blocks of tokens. " +
104+ "First block should be cached across all pods in the test. " +
105+ "Second block will be cached on a subset of pods only. " +
106+ "Third block exists only on the pod with the longest prefix match." ,
98107 },
99108 },
100109 },
101- // Populate kvblock.Index with blocks such that:
102- // - block1 exists on pod-a only (10 hits for pod-a)
103- // - block2 exists on pod-a and pod-b (10 more hits for pod-a, 10 for pod-b)
104- // - block3 exists on all pods (10 more hits each)
105- // Total: pod-a=30, pod-b=20, pod-c=10 -> normalized to 1.0, 0.5, 0.0
106- kvBlockData : map [kvblock.Key ][]kvblock.PodEntry {
107- {ModelName : "test-model" , ChunkHash : 1 }: {
108- {PodIdentifier : "10.0.0.1:8080" },
110+ kvBlockData : func (prompt , model string ) map [kvblock.Key ][]kvblock.PodEntry {
111+ testTokenizer , err := tokenization .NewCachedLocalTokenizer (tokenization.LocalTokenizerConfig {
112+ ModelTokenizerMap : map [string ]string {
113+ "test-model" : filepath .Join (modelDir , "test-model/tokenizer.json" ),
114+ },
115+ })
116+ require .NoError (t , err )
117+
118+ // use the actual tokenizer on the test prompt
119+ tokens , _ , err := testTokenizer .Encode (prompt , model )
120+ require .NoError (t , err )
121+
122+ // compute chunk hashes using the default block size
123+ tokenProcessor := kvblock .NewChunkedTokenDatabase (kvblock .DefaultTokenProcessorConfig ())
124+ chunkKeys := tokenProcessor .TokensToKVBlockKeys (tokens , model )
125+
126+ require .GreaterOrEqual (t , len (chunkKeys ), 3 , "Need at least 3 chunks for test" )
127+
128+ // populate kvblock.Index to test longest prefix matching:
129+ // - chunk0 (first chunk): all pods have it (common prefix start)
130+ // - chunk1: pod-a and pod-b have it (pod-c drops off after chunk0)
131+ // - chunk2: only pod-a has it (pod-b drops off after chunk1)
132+ // LongestPrefixScorer uses intersection, so:
133+ // pod-a: 3 chunks (0,1,2) -> score 3
134+ // pod-b: 2 chunks (0,1) -> score 2
135+ // pod-c: 1 chunk (0) -> score 1
136+ // Normalized: (3-1)/(3-1) = 1.0, (2-1)/(3-1) = 0.5, (1-1)/(3-1) = 0.0
137+
138+ return map [kvblock.Key ][]kvblock.PodEntry {
139+ {ModelName : model , ChunkHash : chunkKeys [0 ].ChunkHash }: {
140+ {PodIdentifier : "10.0.0.1:8080" },
141+ {PodIdentifier : "10.0.0.2:8080" },
142+ {PodIdentifier : "10.0.0.3:8080" },
143+ },
144+ {ModelName : model , ChunkHash : chunkKeys [1 ].ChunkHash }: {
145+ {PodIdentifier : "10.0.0.1:8080" },
146+ {PodIdentifier : "10.0.0.2:8080" },
147+ },
148+ {ModelName : model , ChunkHash : chunkKeys [2 ].ChunkHash }: {
149+ {PodIdentifier : "10.0.0.1:8080" },
150+ },
151+ }
152+ },
153+ wantScoresByAddress : map [string ]float64 {
154+ "10.0.0.1:8080" : 1.0 , // 3 chunks -> (3-1)/(3-1) = 1.0
155+ "10.0.0.2:8080" : 0.5 , // 2 chunks -> (2-1)/(3-1) = 0.5
156+ "10.0.0.3:8080" : 0.0 , // 1 chunk -> (1-1)/(3-1) = 0.0
157+ },
158+ },
159+ {
160+ name : "no cache hits (empty index)" ,
161+ pods : []types.Pod {
162+ & types.PodMetrics {
163+ Pod : & backend.Pod {
164+ NamespacedName : k8stypes.NamespacedName {Name : "pod-a" },
165+ Address : "10.0.0.1:8080" ,
166+ },
109167 },
110- {ModelName : "test-model" , ChunkHash : 2 }: {
111- {PodIdentifier : "10.0.0.1:8080" },
112- {PodIdentifier : "10.0.0.2:8080" },
168+ & types.PodMetrics {
169+ Pod : & backend.Pod {
170+ NamespacedName : k8stypes.NamespacedName {Name : "pod-b" },
171+ Address : "10.0.0.2:8080" ,
172+ },
113173 },
114- {ModelName : "test-model" , ChunkHash : 3 }: {
115- {PodIdentifier : "10.0.0.1:8080" },
116- {PodIdentifier : "10.0.0.2:8080" },
117- {PodIdentifier : "10.0.0.3:8080" },
174+ & types.PodMetrics {
175+ Pod : & backend.Pod {
176+ NamespacedName : k8stypes.NamespacedName {Name : "pod-c" },
177+ Address : "10.0.0.3:8080" ,
178+ },
179+ },
180+ },
181+ request : & types.LLMRequest {
182+ RequestId : "test-request-3" ,
183+ TargetModel : "test-model" ,
184+ Body : & types.LLMRequestBody {
185+ Completions : & types.CompletionsRequest {
186+ Prompt : "This prompt has never been cached before on any pod." ,
187+ },
118188 },
119189 },
190+ kvBlockData : nil , // no cached data
120191 wantScoresByAddress : map [string ]float64 {
121- "10.0.0.1:8080" : 1.0 , // 30 hits -> (30-10)/(30-10) = 1.0
122- "10.0.0.2:8080" : 0.5 , // 20 hits -> (20-10)/(30-10) = 0.5
123- "10.0.0.3:8080" : 0.0 , // 10 hits -> (10-10)/(30-10) = 0.0
192+ // when no pods have any cache hits, all should get equal scores (0.0)
193+ "10.0.0.1:8080" : 0.0 ,
194+ "10.0.0.2:8080" : 0.0 ,
195+ "10.0.0.3:8080" : 0.0 ,
196+ },
197+ },
198+ {
199+ name : "all pods have equal prefix length" ,
200+ pods : []types.Pod {
201+ & types.PodMetrics {
202+ Pod : & backend.Pod {
203+ NamespacedName : k8stypes.NamespacedName {Name : "pod-a" },
204+ Address : "10.0.0.1:8080" ,
205+ },
206+ },
207+ & types.PodMetrics {
208+ Pod : & backend.Pod {
209+ NamespacedName : k8stypes.NamespacedName {Name : "pod-b" },
210+ Address : "10.0.0.2:8080" ,
211+ },
212+ },
213+ & types.PodMetrics {
214+ Pod : & backend.Pod {
215+ NamespacedName : k8stypes.NamespacedName {Name : "pod-c" },
216+ Address : "10.0.0.3:8080" ,
217+ },
218+ },
219+ },
220+ request : & types.LLMRequest {
221+ RequestId : "test-request-4" ,
222+ TargetModel : "test-model" ,
223+ Body : & types.LLMRequestBody {
224+ Completions : & types.CompletionsRequest {
225+ Prompt : "All pods have the same cached prefix for this particular prompt text. " +
226+ "We need to ensure this prompt is long enough to generate at least two token chunks. " +
227+ "This additional text should provide sufficient tokens to meet the minimum requirement." ,
228+ },
229+ },
230+ },
231+ kvBlockData : func (prompt , model string ) map [kvblock.Key ][]kvblock.PodEntry {
232+ testTokenizer , err := tokenization .NewCachedLocalTokenizer (tokenization.LocalTokenizerConfig {
233+ ModelTokenizerMap : map [string ]string {
234+ "test-model" : filepath .Join (modelDir , "test-model/tokenizer.json" ),
235+ },
236+ })
237+ require .NoError (t , err )
238+
239+ tokens , _ , err := testTokenizer .Encode (prompt , model )
240+ require .NoError (t , err )
241+
242+ tokenProcessor := kvblock .NewChunkedTokenDatabase (kvblock .DefaultTokenProcessorConfig ())
243+ chunkKeys := tokenProcessor .TokensToKVBlockKeys (tokens , model )
244+
245+ require .GreaterOrEqual (t , len (chunkKeys ), 2 , "Need at least 2 chunks for test" )
246+
247+ // all pods have the same 2 chunks cached
248+ return map [kvblock.Key ][]kvblock.PodEntry {
249+ {ModelName : model , ChunkHash : chunkKeys [0 ].ChunkHash }: {
250+ {PodIdentifier : "10.0.0.1:8080" },
251+ {PodIdentifier : "10.0.0.2:8080" },
252+ {PodIdentifier : "10.0.0.3:8080" },
253+ },
254+ {ModelName : model , ChunkHash : chunkKeys [1 ].ChunkHash }: {
255+ {PodIdentifier : "10.0.0.1:8080" },
256+ {PodIdentifier : "10.0.0.2:8080" },
257+ {PodIdentifier : "10.0.0.3:8080" },
258+ },
259+ }
260+ },
261+ wantScoresByAddress : map [string ]float64 {
262+ // when all pods have equal cache (minScore == maxScore), the implementation
263+ // returns 1.0 for all pods to avoid division by zero
264+ "10.0.0.1:8080" : 1.0 ,
265+ "10.0.0.2:8080" : 1.0 ,
266+ "10.0.0.3:8080" : 1.0 ,
124267 },
125268 },
126269 }
127270
128271 for _ , tt := range testcases {
129272 t .Run (tt .name , func (t * testing.T ) {
130- ctx := context .Background ()
131- d , _ := os .Getwd ()
132- modelDir := filepath .Join (d , "/testdata" )
273+ ctx := t .Context ()
133274
134275 kvcacheConfig , err := kvcache .NewDefaultConfig ()
135- kvcacheConfig .TokenizersPoolConfig . WorkersCount = 1
136- //kvcacheConfig.TokenizersPoolConfig.LocalTokenizerConfig.AutoDiscoveryDir = modelDir
137- kvcacheConfig . TokenizersPoolConfig . LocalTokenizerConfig . ModelTokenizerMap = map [ string ] string {
138- "test-model" : filepath . Join ( modelDir , "test-model/tokenizer.json" ) ,
276+ kvcacheConfig .TokenizersPoolConfig = & tokenization. Config {
277+ WorkersCount : 1 ,
278+ MinPrefixOverlapRatio : 0.8 ,
279+ LocalTokenizerConfig : & localTokenizerConfig ,
139280 }
140- kvcacheConfig .TokenizersPoolConfig .HFTokenizerConfig .Enabled = false
141- kvcacheConfig .TokenizersPoolConfig .HFTokenizerConfig .TokenizersCacheDir = "./build/tokenizers"
142281 require .NoError (t , err )
143282
144283 prefixCacheScorer , err := scorer .New (ctx , scorer.PrecisePrefixCachePluginConfig {
@@ -148,15 +287,19 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
148287 require .NoError (t , err )
149288 require .NotNil (t , prefixCacheScorer )
150289
151- // prefill
152- _ = prefixCacheScorer .Score (ctx , nil , tt .request , tt .pods )
153-
154- // Populate the kvblock.Index with test data
155- if tt .kvBlockData != nil {
290+ // populate the kvblock.Index with test data
291+ if tt .kvBlockData != nil && tt .request != nil && tt .request .Body != nil {
156292 kvBlockIndex := prefixCacheScorer .KVBlockIndex ()
157- for key , entries := range tt .kvBlockData {
158- keys := []kvblock.Key {key }
159- err := kvBlockIndex .Add (ctx , keys , entries )
293+ var prompt string
294+ if tt .request .Body .Completions != nil {
295+ prompt = tt .request .Body .Completions .Prompt
296+ } else if tt .request .Body .ChatCompletions != nil {
297+ // ChatCompletions seem to be hanging right now
298+ t .Fatalf ("Not yet implemented" )
299+ }
300+ blockData := tt .kvBlockData (prompt , tt .request .TargetModel )
301+ for key , entries := range blockData {
302+ err := kvBlockIndex .Add (ctx , []kvblock.Key {key }, entries )
160303 require .NoError (t , err )
161304 }
162305 }
0 commit comments