Skip to content

Commit 91283b1

Browse files
mvanhornclaude
andcommitted
feat(embedding): surface non-symmetric embedding config for VikingDB provider
VikingDB embedders accepted is_query but ignored it. Now VikingDBDenseEmbedder and VikingDBHybridEmbedder accept query_param/document_param and pass input_type to the API when non-symmetric mode is configured. - Add query_param/document_param to VikingDB Dense and Hybrid constructors - Add _resolve_input_type() to select query vs document param - Pass input_type in _call_api data items when set - Wire factory entries to pass config params through - Sparse embedder unchanged (sparse models are symmetric) Closes #655 Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
1 parent a092d64 commit 91283b1

File tree

3 files changed

+106
-4
lines changed

3 files changed

+106
-4
lines changed

openviking/models/embedder/vikingdb_embedders.py

Lines changed: 42 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,15 @@ def _call_api(
4040
texts: List[str],
4141
dense_model: Dict[str, Any] = None,
4242
sparse_model: Optional[Dict[str, Any]] = None,
43+
input_type: Optional[str] = None,
4344
) -> List[Dict[str, Any]]:
4445
"""Call VikingDB Embedding API"""
4546
path = "/api/vikingdb/embedding"
4647

4748
data_items = [{"text": text} for text in texts]
49+
if input_type is not None:
50+
for item in data_items:
51+
item["input_type"] = input_type
4852

4953
req_body = {"data": data_items}
5054
if dense_model:
@@ -116,17 +120,30 @@ def __init__(
116120
dimension: Optional[int] = None,
117121
embedding_type: str = "text",
118122
config: Optional[Dict[str, Any]] = None,
123+
query_param: Optional[str] = None,
124+
document_param: Optional[str] = None,
119125
):
120126
DenseEmbedderBase.__init__(self, model_name, config)
121127
self._init_vikingdb_client(ak, sk, region, host)
122128
self.model_version = model_version
123129
self.dimension = dimension
124130
self.embedding_type = embedding_type
125131
self.dense_model = {"name": model_name, "version": model_version, "dim": dimension}
132+
self.query_param = query_param
133+
self.document_param = document_param
134+
135+
def _resolve_input_type(self, is_query: bool) -> Optional[str]:
136+
"""Return the input_type value for query or document side, or None for symmetric mode."""
137+
if is_query and self.query_param is not None:
138+
return self.query_param
139+
if not is_query and self.document_param is not None:
140+
return self.document_param
141+
return None
126142

127143
def embed(self, text: str, is_query: bool = False) -> EmbedResult:
144+
input_type = self._resolve_input_type(is_query)
128145
results = transient_retry(
129-
lambda: self._call_api([text], dense_model=self.dense_model),
146+
lambda: self._call_api([text], dense_model=self.dense_model, input_type=input_type),
130147
max_retries=self.max_retries,
131148
)
132149
if not results:
@@ -142,8 +159,9 @@ def embed(self, text: str, is_query: bool = False) -> EmbedResult:
142159
def embed_batch(self, texts: List[str], is_query: bool = False) -> List[EmbedResult]:
143160
if not texts:
144161
return []
162+
input_type = self._resolve_input_type(is_query)
145163
raw_results = transient_retry(
146-
lambda: self._call_api(texts, dense_model=self.dense_model),
164+
lambda: self._call_api(texts, dense_model=self.dense_model, input_type=input_type),
147165
max_retries=self.max_retries,
148166
)
149167
return [
@@ -224,6 +242,8 @@ def __init__(
224242
dimension: Optional[int] = None,
225243
embedding_type: str = "text",
226244
config: Optional[Dict[str, Any]] = None,
245+
query_param: Optional[str] = None,
246+
document_param: Optional[str] = None,
227247
):
228248
HybridEmbedderBase.__init__(self, model_name, config)
229249
self._init_vikingdb_client(ak, sk, region, host)
@@ -235,11 +255,25 @@ def __init__(
235255
"name": model_name,
236256
"version": model_version,
237257
}
258+
self.query_param = query_param
259+
self.document_param = document_param
260+
261+
def _resolve_input_type(self, is_query: bool) -> Optional[str]:
262+
"""Return the input_type value for query or document side, or None for symmetric mode."""
263+
if is_query and self.query_param is not None:
264+
return self.query_param
265+
if not is_query and self.document_param is not None:
266+
return self.document_param
267+
return None
238268

239269
def embed(self, text: str, is_query: bool = False) -> EmbedResult:
270+
input_type = self._resolve_input_type(is_query)
240271
results = transient_retry(
241272
lambda: self._call_api(
242-
[text], dense_model=self.dense_model, sparse_model=self.sparse_model
273+
[text],
274+
dense_model=self.dense_model,
275+
sparse_model=self.sparse_model,
276+
input_type=input_type,
243277
),
244278
max_retries=self.max_retries,
245279
)
@@ -260,9 +294,13 @@ def embed(self, text: str, is_query: bool = False) -> EmbedResult:
260294
def embed_batch(self, texts: List[str], is_query: bool = False) -> List[EmbedResult]:
261295
if not texts:
262296
return []
297+
input_type = self._resolve_input_type(is_query)
263298
raw_results = transient_retry(
264299
lambda: self._call_api(
265-
texts, dense_model=self.dense_model, sparse_model=self.sparse_model
300+
texts,
301+
dense_model=self.dense_model,
302+
sparse_model=self.sparse_model,
303+
input_type=input_type,
266304
),
267305
max_retries=self.max_retries,
268306
)

openviking_cli/utils/config/embedding_config.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -399,6 +399,8 @@ def _create_embedder(
399399
"host": cfg.host,
400400
"dimension": cfg.dimension,
401401
"input_type": cfg.input,
402+
**({"query_param": cfg.query_param} if cfg.query_param else {}),
403+
**({"document_param": cfg.document_param} if cfg.document_param else {}),
402404
},
403405
),
404406
("vikingdb", "sparse"): (
@@ -423,6 +425,8 @@ def _create_embedder(
423425
"host": cfg.host,
424426
"dimension": cfg.dimension,
425427
"input_type": cfg.input,
428+
**({"query_param": cfg.query_param} if cfg.query_param else {}),
429+
**({"document_param": cfg.document_param} if cfg.document_param else {}),
426430
},
427431
),
428432
("jina", "dense"): (
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright (c) 2026 Beijing Volcano Engine Technology Co., Ltd.
2+
# SPDX-License-Identifier: AGPL-3.0
3+
"""Tests for VikingDB non-symmetric embedding support."""
4+
5+
from unittest.mock import patch
6+
7+
import pytest
8+
9+
from openviking.models.embedder.vikingdb_embedders import (
10+
VikingDBDenseEmbedder,
11+
VikingDBHybridEmbedder,
12+
)
13+
14+
15+
@pytest.fixture
16+
def mock_vikingdb_client():
17+
"""Patch VikingDB client initialization."""
18+
with patch.object(
19+
VikingDBDenseEmbedder, "_init_vikingdb_client", return_value=None
20+
) as mock_init:
21+
mock_init.side_effect = lambda *args, **kwargs: None
22+
yield mock_init
23+
24+
25+
def test_dense_resolve_input_type_symmetric():
26+
"""When no query_param/document_param, input_type is None (symmetric)."""
27+
embedder = VikingDBDenseEmbedder.__new__(VikingDBDenseEmbedder)
28+
embedder.query_param = None
29+
embedder.document_param = None
30+
assert embedder._resolve_input_type(is_query=True) is None
31+
assert embedder._resolve_input_type(is_query=False) is None
32+
33+
34+
def test_dense_resolve_input_type_nonsymmetric():
35+
"""When query_param/document_param set, return correct value for is_query."""
36+
embedder = VikingDBDenseEmbedder.__new__(VikingDBDenseEmbedder)
37+
embedder.query_param = "query"
38+
embedder.document_param = "passage"
39+
assert embedder._resolve_input_type(is_query=True) == "query"
40+
assert embedder._resolve_input_type(is_query=False) == "passage"
41+
42+
43+
def test_hybrid_resolve_input_type_nonsymmetric():
44+
"""Hybrid embedder also resolves input_type correctly."""
45+
embedder = VikingDBHybridEmbedder.__new__(VikingDBHybridEmbedder)
46+
embedder.query_param = "search_query"
47+
embedder.document_param = "search_document"
48+
assert embedder._resolve_input_type(is_query=True) == "search_query"
49+
assert embedder._resolve_input_type(is_query=False) == "search_document"
50+
51+
52+
def test_dense_backward_compat_no_params():
53+
"""VikingDBDenseEmbedder without query_param/document_param works."""
54+
embedder = VikingDBDenseEmbedder.__new__(VikingDBDenseEmbedder)
55+
embedder.query_param = None
56+
embedder.document_param = None
57+
embedder.model_name = "test"
58+
embedder.dimension = 1024
59+
# Should not raise
60+
assert embedder._resolve_input_type(is_query=True) is None

0 commit comments

Comments
 (0)