Skip to content

Commit c6c9c64

Browse files
authored
feat: Allow None metadata filter by using IS_EMPTY operator (run-llama#15167)
1 parent b4776ae commit c6c9c64

File tree

4 files changed

+48
-13
lines changed
  • llama-index-core
  • llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant

4 files changed

+48
-13
lines changed

llama-index-core/llama_index/core/vector_stores/simple.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -91,12 +91,19 @@ def _process_filter_match(
9191
filter_matches_list = []
9292
for filter_ in filter_list:
9393
filter_matches = True
94-
95-
filter_matches = _process_filter_match(
96-
operator=filter_.operator,
97-
value=filter_.value,
98-
metadata_value=metadata.get(filter_.key, None),
99-
)
94+
metadata_value = metadata.get(filter_.key, None)
95+
if filter_.operator == FilterOperator.IS_EMPTY:
96+
filter_matches = (
97+
metadata_value is None
98+
or metadata_value == ""
99+
or metadata_value == []
100+
)
101+
else:
102+
filter_matches = _process_filter_match(
103+
operator=filter_.operator,
104+
value=filter_.value,
105+
metadata_value=metadata_value,
106+
)
100107

101108
filter_matches_list.append(filter_matches)
102109

llama-index-core/llama_index/core/vector_stores/types.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,7 @@ class FilterOperator(str, Enum):
7474
ALL = "all" # Contains all (array of strings)
7575
TEXT_MATCH = "text_match" # full text match (allows you to search for a specific substring, token or phrase within the text field)
7676
CONTAINS = "contains" # metadata array contains value (string or number)
77+
IS_EMPTY = "is_empty" # the field is not exist or empty (null or empty array)
7778

7879

7980
class FilterCondition(str, Enum):
@@ -94,13 +95,15 @@ class MetadataFilter(BaseModel):
9495
"""
9596

9697
key: str
97-
value: Union[
98-
StrictInt,
99-
StrictFloat,
100-
StrictStr,
101-
List[StrictStr],
102-
List[StrictFloat],
103-
List[StrictInt],
98+
value: Optional[
99+
Union[
100+
StrictInt,
101+
StrictFloat,
102+
StrictStr,
103+
List[StrictStr],
104+
List[StrictFloat],
105+
List[StrictInt],
106+
]
104107
]
105108
operator: FilterOperator = FilterOperator.EQ
106109

llama-index-core/tests/vector_stores/test_simple.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,23 @@ def test_query_with_all_filter_returns_matches(self) -> None:
400400
assert result.ids is not None
401401
self.assertEqual(len(result.ids), 2)
402402

403+
def test_query_with_is_empty_filter_returns_matches(self) -> None:
404+
simple_vector_store = SimpleVectorStore()
405+
simple_vector_store.add(_node_embeddings_for_test())
406+
407+
filters = MetadataFilters(
408+
filters=[
409+
MetadataFilter(
410+
key="not_existed_key", operator=FilterOperator.IS_EMPTY, value=None
411+
)
412+
]
413+
)
414+
query = VectorStoreQuery(
415+
query_embedding=[1.0, 1.0], filters=filters, similarity_top_k=3
416+
)
417+
result = simple_vector_store.query(query)
418+
self.assertEqual(len(result.ids), len(_node_embeddings_for_test()))
419+
403420
def test_clear(self) -> None:
404421
simple_vector_store = SimpleVectorStore()
405422
simple_vector_store.add(_node_embeddings_for_test())

llama-index-integrations/vector_stores/llama-index-vector-stores-qdrant/llama_index/vector_stores/qdrant/base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,10 @@
4444
MatchText,
4545
MatchValue,
4646
Payload,
47+
PayloadField,
4748
Range,
4849
HasIdCondition,
50+
IsEmptyCondition,
4951
)
5052

5153
logger = logging.getLogger(__name__)
@@ -1095,6 +1097,12 @@ def _build_subfilter(self, filters: MetadataFilters) -> Filter:
10951097
match=MatchExcept(**{"except": values}),
10961098
)
10971099
)
1100+
elif subfilter.operator == FilterOperator.IS_EMPTY:
1101+
# This condition will match all records where the field reports either does not exist, or has null or [] value.
1102+
# https://qdrant.tech/documentation/concepts/filtering/#is-empty
1103+
conditions.append(
1104+
IsEmptyCondition(is_empty=PayloadField(key=subfilter.key))
1105+
)
10981106

10991107
filter = Filter()
11001108
if filters.condition == FilterCondition.AND:

0 commit comments

Comments
 (0)