add more test cases

evacchi · evacchi · commit 6ff41cdcdacd · 2025-12-05T17:04:48.000+01:00
Signed-off-by: Edoardo Vacchi &lt;evacchi@users.noreply.github.com&gt;
diff --git a/pkg/plugins/scorer/precise_prefix_cache_test.go b/pkg/plugins/scorer/precise_prefix_cache_test.go
@@ -1,7 +1,6 @@
 package scorer_test
 
 import (
-	"context"
 	"os"
 	"path/filepath"
 	"testing"
@@ -11,6 +10,7 @@ import (
 	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache"
 	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvblock"
 	"github.com/llm-d/llm-d-kv-cache-manager/pkg/kvcache/kvevents"
+	"github.com/llm-d/llm-d-kv-cache-manager/pkg/tokenization"
 	"github.com/stretchr/testify/require"
 	k8stypes "k8s.io/apimachinery/pkg/types"
 	"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
@@ -19,12 +19,21 @@ import (
 )
 
 func TestPrefixCacheTracking_Score(t *testing.T) {
+	d, err := os.Getwd()
+	require.NoError(t, err)
+	modelDir := filepath.Join(d, "testdata")
+	localTokenizerConfig := tokenization.LocalTokenizerConfig{
+		ModelTokenizerMap: map[string]string{
+			"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
+		},
+	}
+
 	testcases := []struct {
 		name                string
 		pods                []types.Pod
 		request             *types.LLMRequest
-		kvBlockData         map[kvblock.Key][]kvblock.PodEntry // KV-blocks to populate in the index
-		wantScoresByAddress map[string]float64                 // Use address as key instead of Pod objects
+		kvBlockData         func(prompt string, model string) map[kvblock.Key][]kvblock.PodEntry
+		wantScoresByAddress map[string]float64
 	}{
 		{
 			name: "nil request",
@@ -36,9 +45,7 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 					},
 				},
 			},
-			request:             nil,
-			kvBlockData:         nil,
-			wantScoresByAddress: make(map[string]float64), // empty map instead of nil
+			wantScoresByAddress: map[string]float64{}, // empty map
 		},
 		{
 			name: "empty request body",
@@ -55,11 +62,10 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 				TargetModel: "test-model",
 				Body:        nil,
 			},
-			kvBlockData:         nil,                      // no kv-blocks in index
-			wantScoresByAddress: make(map[string]float64), // empty map instead of nil
+			wantScoresByAddress: map[string]float64{}, // empty map
 		},
 		{
-			name: "test normalized scores with different kv-block hits",
+			name: "longest prefix scorer (default scorer)",
 			pods: []types.Pod{
 				&types.PodMetrics{
 					Pod: &backend.Pod{
@@ -94,51 +100,184 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 				TargetModel: "test-model",
 				Body: &types.LLMRequestBody{
 					Completions: &types.CompletionsRequest{
-						Prompt: "hello world",
+						Prompt: "Testing prefix cache with multiple blocks of tokens. " +
+							"First block should be cached across all pods in the test. " +
+							"Second block will be cached on a subset of pods only. " +
+							"Third block exists only on the pod with the longest prefix match.",
 					},
 				},
 			},
-			// Populate kvblock.Index with blocks such that:
-			// - block1 exists on pod-a only (10 hits for pod-a)
-			// - block2 exists on pod-a and pod-b (10 more hits for pod-a, 10 for pod-b)
-			// - block3 exists on all pods (10 more hits each)
-			// Total: pod-a=30, pod-b=20, pod-c=10 -> normalized to 1.0, 0.5, 0.0
-			kvBlockData: map[kvblock.Key][]kvblock.PodEntry{
-				{ModelName: "test-model", ChunkHash: 1}: {
-					{PodIdentifier: "10.0.0.1:8080"},
+			kvBlockData: func(prompt, model string) map[kvblock.Key][]kvblock.PodEntry {
+				testTokenizer, err := tokenization.NewCachedLocalTokenizer(tokenization.LocalTokenizerConfig{
+					ModelTokenizerMap: map[string]string{
+						"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
+					},
+				})
+				require.NoError(t, err)
+
+				// use the actual tokenizer on the test prompt
+				tokens, _, err := testTokenizer.Encode(prompt, model)
+				require.NoError(t, err)
+
+				// compute chunk hashes using the default block size
+				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
+				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
+
+				require.GreaterOrEqual(t, len(chunkKeys), 3, "Need at least 3 chunks for test")
+
+				// populate kvblock.Index to test longest prefix matching:
+				// - chunk0 (first chunk): all pods have it (common prefix start)
+				// - chunk1: pod-a and pod-b have it (pod-c drops off after chunk0)
+				// - chunk2: only pod-a has it (pod-b drops off after chunk1)
+				// LongestPrefixScorer uses intersection, so:
+				//   pod-a: 3 chunks (0,1,2) -> score 3
+				//   pod-b: 2 chunks (0,1) -> score 2
+				//   pod-c: 1 chunk (0) -> score 1
+				// Normalized: (3-1)/(3-1) = 1.0, (2-1)/(3-1) = 0.5, (1-1)/(3-1) = 0.0
+
+				return map[kvblock.Key][]kvblock.PodEntry{
+					{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
+						{PodIdentifier: "10.0.0.1:8080"},
+						{PodIdentifier: "10.0.0.2:8080"},
+						{PodIdentifier: "10.0.0.3:8080"},
+					},
+					{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
+						{PodIdentifier: "10.0.0.1:8080"},
+						{PodIdentifier: "10.0.0.2:8080"},
+					},
+					{ModelName: model, ChunkHash: chunkKeys[2].ChunkHash}: {
+						{PodIdentifier: "10.0.0.1:8080"},
+					},
+				}
+			},
+			wantScoresByAddress: map[string]float64{
+				"10.0.0.1:8080": 1.0, // 3 chunks -> (3-1)/(3-1) = 1.0
+				"10.0.0.2:8080": 0.5, // 2 chunks -> (2-1)/(3-1) = 0.5
+				"10.0.0.3:8080": 0.0, // 1 chunk -> (1-1)/(3-1) = 0.0
+			},
+		},
+		{
+			name: "no cache hits (empty index)",
+			pods: []types.Pod{
+				&types.PodMetrics{
+					Pod: &backend.Pod{
+						NamespacedName: k8stypes.NamespacedName{Name: "pod-a"},
+						Address:        "10.0.0.1:8080",
+					},
 				},
-				{ModelName: "test-model", ChunkHash: 2}: {
-					{PodIdentifier: "10.0.0.1:8080"},
-					{PodIdentifier: "10.0.0.2:8080"},
+				&types.PodMetrics{
+					Pod: &backend.Pod{
+						NamespacedName: k8stypes.NamespacedName{Name: "pod-b"},
+						Address:        "10.0.0.2:8080",
+					},
 				},
-				{ModelName: "test-model", ChunkHash: 3}: {
-					{PodIdentifier: "10.0.0.1:8080"},
-					{PodIdentifier: "10.0.0.2:8080"},
-					{PodIdentifier: "10.0.0.3:8080"},
+				&types.PodMetrics{
+					Pod: &backend.Pod{
+						NamespacedName: k8stypes.NamespacedName{Name: "pod-c"},
+						Address:        "10.0.0.3:8080",
+					},
+				},
+			},
+			request: &types.LLMRequest{
+				RequestId:   "test-request-3",
+				TargetModel: "test-model",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "This prompt has never been cached before on any pod.",
+					},
 				},
 			},
+			kvBlockData: nil, // no cached data
 			wantScoresByAddress: map[string]float64{
-				"10.0.0.1:8080": 1.0, // 30 hits -> (30-10)/(30-10) = 1.0
-				"10.0.0.2:8080": 0.5, // 20 hits -> (20-10)/(30-10) = 0.5
-				"10.0.0.3:8080": 0.0, // 10 hits -> (10-10)/(30-10) = 0.0
+				// when no pods have any cache hits, all should get equal scores (0.0)
+				"10.0.0.1:8080": 0.0,
+				"10.0.0.2:8080": 0.0,
+				"10.0.0.3:8080": 0.0,
+			},
+		},
+		{
+			name: "all pods have equal prefix length",
+			pods: []types.Pod{
+				&types.PodMetrics{
+					Pod: &backend.Pod{
+						NamespacedName: k8stypes.NamespacedName{Name: "pod-a"},
+						Address:        "10.0.0.1:8080",
+					},
+				},
+				&types.PodMetrics{
+					Pod: &backend.Pod{
+						NamespacedName: k8stypes.NamespacedName{Name: "pod-b"},
+						Address:        "10.0.0.2:8080",
+					},
+				},
+				&types.PodMetrics{
+					Pod: &backend.Pod{
+						NamespacedName: k8stypes.NamespacedName{Name: "pod-c"},
+						Address:        "10.0.0.3:8080",
+					},
+				},
+			},
+			request: &types.LLMRequest{
+				RequestId:   "test-request-4",
+				TargetModel: "test-model",
+				Body: &types.LLMRequestBody{
+					Completions: &types.CompletionsRequest{
+						Prompt: "All pods have the same cached prefix for this particular prompt text. " +
+							"We need to ensure this prompt is long enough to generate at least two token chunks. " +
+							"This additional text should provide sufficient tokens to meet the minimum requirement.",
+					},
+				},
+			},
+			kvBlockData: func(prompt, model string) map[kvblock.Key][]kvblock.PodEntry {
+				testTokenizer, err := tokenization.NewCachedLocalTokenizer(tokenization.LocalTokenizerConfig{
+					ModelTokenizerMap: map[string]string{
+						"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
+					},
+				})
+				require.NoError(t, err)
+
+				tokens, _, err := testTokenizer.Encode(prompt, model)
+				require.NoError(t, err)
+
+				tokenProcessor := kvblock.NewChunkedTokenDatabase(kvblock.DefaultTokenProcessorConfig())
+				chunkKeys := tokenProcessor.TokensToKVBlockKeys(tokens, model)
+
+				require.GreaterOrEqual(t, len(chunkKeys), 2, "Need at least 2 chunks for test")
+
+				// all pods have the same 2 chunks cached
+				return map[kvblock.Key][]kvblock.PodEntry{
+					{ModelName: model, ChunkHash: chunkKeys[0].ChunkHash}: {
+						{PodIdentifier: "10.0.0.1:8080"},
+						{PodIdentifier: "10.0.0.2:8080"},
+						{PodIdentifier: "10.0.0.3:8080"},
+					},
+					{ModelName: model, ChunkHash: chunkKeys[1].ChunkHash}: {
+						{PodIdentifier: "10.0.0.1:8080"},
+						{PodIdentifier: "10.0.0.2:8080"},
+						{PodIdentifier: "10.0.0.3:8080"},
+					},
+				}
+			},
+			wantScoresByAddress: map[string]float64{
+				// when all pods have equal cache (minScore == maxScore), the implementation
+				// returns 1.0 for all pods to avoid division by zero
+				"10.0.0.1:8080": 1.0,
+				"10.0.0.2:8080": 1.0,
+				"10.0.0.3:8080": 1.0,
 			},
 		},
 	}
 
 	for _, tt := range testcases {
 		t.Run(tt.name, func(t *testing.T) {
-			ctx := context.Background()
-			d, _ := os.Getwd()
-			modelDir := filepath.Join(d, "/testdata")
+			ctx := t.Context()
 
 			kvcacheConfig, err := kvcache.NewDefaultConfig()
-			kvcacheConfig.TokenizersPoolConfig.WorkersCount = 1
-			//kvcacheConfig.TokenizersPoolConfig.LocalTokenizerConfig.AutoDiscoveryDir = modelDir
-			kvcacheConfig.TokenizersPoolConfig.LocalTokenizerConfig.ModelTokenizerMap = map[string]string{
-				"test-model": filepath.Join(modelDir, "test-model/tokenizer.json"),
+			kvcacheConfig.TokenizersPoolConfig = &tokenization.Config{
+				WorkersCount:          1,
+				MinPrefixOverlapRatio: 0.8,
+				LocalTokenizerConfig:  &localTokenizerConfig,
 			}
-			kvcacheConfig.TokenizersPoolConfig.HFTokenizerConfig.Enabled = false
-			kvcacheConfig.TokenizersPoolConfig.HFTokenizerConfig.TokenizersCacheDir = "./build/tokenizers"
 			require.NoError(t, err)
 
 			prefixCacheScorer, err := scorer.New(ctx, scorer.PrecisePrefixCachePluginConfig{
@@ -148,15 +287,19 @@ func TestPrefixCacheTracking_Score(t *testing.T) {
 			require.NoError(t, err)
 			require.NotNil(t, prefixCacheScorer)
 
-			// prefill
-			_ = prefixCacheScorer.Score(ctx, nil, tt.request, tt.pods)
-
-			// Populate the kvblock.Index with test data
-			if tt.kvBlockData != nil {
+			// populate the kvblock.Index with test data
+			if tt.kvBlockData != nil && tt.request != nil && tt.request.Body != nil {
 				kvBlockIndex := prefixCacheScorer.KVBlockIndex()
-				for key, entries := range tt.kvBlockData {
-					keys := []kvblock.Key{key}
-					err := kvBlockIndex.Add(ctx, keys, entries)
+				var prompt string
+				if tt.request.Body.Completions != nil {
+					prompt = tt.request.Body.Completions.Prompt
+				} else if tt.request.Body.ChatCompletions != nil {
+					// ChatCompletions seem to be hanging right now
+					t.Fatalf("Not yet implemented")
+				}
+				blockData := tt.kvBlockData(prompt, tt.request.TargetModel)
+				for key, entries := range blockData {
+					err := kvBlockIndex.Add(ctx, []kvblock.Key{key}, entries)
 					require.NoError(t, err)
 				}
 			}