Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@
content_from_data_and_mime_type,
make_overlapped_chunks,
)
from llama_stack.providers.utils.vector_io.vector_utils import (
sanitize_metadata_for_attributes,
)
from llama_stack_api import (
Chunk,
Files,
Expand Down Expand Up @@ -635,7 +638,7 @@ async def openai_search_vector_store(
file_id=chunk.metadata.get("document_id", ""),
filename=chunk.metadata.get("filename", ""),
score=score,
attributes=chunk.metadata,
attributes=sanitize_metadata_for_attributes(chunk.metadata),
content=content,
)
data.append(response_data_item)
Expand Down
23 changes: 23 additions & 0 deletions src/llama_stack/providers/utils/vector_io/vector_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import hashlib
import re
import uuid
from typing import Any


def generate_chunk_id(document_id: str, chunk_text: str, chunk_window: str | None = None) -> str:
Expand Down Expand Up @@ -37,6 +38,28 @@ def sanitize_collection_name(name: str, weaviate_format=False) -> str:
return s


def sanitize_metadata_for_attributes(metadata: dict[str, Any]) -> dict[str, str | float | bool]:
"""
Filter metadata to primitives for VectorStoreSearchResponse.attributes compatibility.

Converts dict[str, Any] to dict[str, str | float | bool]:
- Preserves: str, bool
- Converts: int/float -> float, list -> comma-separated string
- Filters: dict, None, other types
"""
sanitized: dict[str, str | float | bool] = {}
for key, value in metadata.items():
if isinstance(value, bool):
sanitized[key] = value
elif isinstance(value, int | float):
sanitized[key] = float(value)
elif isinstance(value, str):
sanitized[key] = value
elif isinstance(value, list):
sanitized[key] = ", ".join(str(item) for item in value)
return sanitized


class WeightedInMemoryAggregator:
@staticmethod
def _normalize_scores(scores: dict[str, float]) -> dict[str, float]:
Expand Down
29 changes: 28 additions & 1 deletion tests/unit/providers/vector_io/test_vector_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@
# This source code is licensed under the terms described in the LICENSE file in
# the root directory of this source tree.

from llama_stack.providers.utils.vector_io.vector_utils import generate_chunk_id
from llama_stack.providers.utils.vector_io.vector_utils import (
generate_chunk_id,
sanitize_metadata_for_attributes,
)
from llama_stack_api import Chunk, ChunkMetadata

# This test is a unit test for the chunk_utils.py helpers. This should only contain
Expand Down Expand Up @@ -78,3 +81,27 @@ def test_chunk_serialization():
serialized_chunk = chunk.model_dump()
assert serialized_chunk["chunk_id"] == "test-chunk-id"
assert "chunk_id" in serialized_chunk


def test_sanitize_metadata_for_attributes():
"""Test sanitization of metadata for VectorStoreSearchResponse.attributes."""
# metadata with lists should be converted to strings
metadata = {
"tags": ["transformers", "h100-compatible", "region:us"],
"model_name": "granite-3.3-8b",
"score": 0.95,
"active": True,
"count": 42,
"nested": {"key": "value"}, # Should be filtered out
}
result = sanitize_metadata_for_attributes(metadata)

# Lists converted to comma-separated strings
assert result["tags"] == "transformers, h100-compatible, region:us"
# Primitives preserved
assert result["model_name"] == "granite-3.3-8b"
assert result["score"] == 0.95
assert result["active"] is True
assert result["count"] == 42.0 # int -> float
# Complex types filtered out
assert "nested" not in result
Loading