Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 22 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,26 +39,27 @@ pip install 'vectordb-bench[pinecone]'
```
All the database client supported

| Optional database client | install command |
|--------------------------|---------------------------------------------|
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
| all (*clients requirements might be conflict with each other*) | `pip install 'vectordb-bench[all]'` |
| qdrant | `pip install 'vectordb-bench[qdrant]'` |
| pinecone | `pip install 'vectordb-bench[pinecone]'` |
| weaviate | `pip install 'vectordb-bench[weaviate]'` |
| elastic, aliyun_elasticsearch| `pip install 'vectordb-bench[elastic]'` |
| pgvector, pgvectorscale, pgdiskann, alloydb | `pip install 'vectordb-bench[pgvector]'` |
| pgvecto.rs | `pip install 'vectordb-bench[pgvecto_rs]'` |
| redis | `pip install 'vectordb-bench[redis]'` |
| memorydb | `pip install 'vectordb-bench[memorydb]'` |
| chromadb | `pip install 'vectordb-bench[chromadb]'` |
| awsopensearch | `pip install 'vectordb-bench[opensearch]'` |
| aliyun_opensearch | `pip install 'vectordb-bench[aliyun_opensearch]'` |
| mongodb | `pip install 'vectordb-bench[mongodb]'` |
| tidb | `pip install 'vectordb-bench[tidb]'` |
| vespa | `pip install 'vectordb-bench[vespa]'` |
| oceanbase | `pip install 'vectordb-bench[oceanbase]'` |
| hologres | `pip install 'vectordb-bench[hologres]'` |
| Optional database client | install command |
|----------------------------------------------------------------|---------------------------------------------------|
| pymilvus, zilliz_cloud (*default*) | `pip install vectordb-bench` |
| all (*clients requirements might be conflict with each other*) | `pip install 'vectordb-bench[all]'` |
| qdrant | `pip install 'vectordb-bench[qdrant]'` |
| pinecone | `pip install 'vectordb-bench[pinecone]'` |
| weaviate | `pip install 'vectordb-bench[weaviate]'` |
| elastic, aliyun_elasticsearch | `pip install 'vectordb-bench[elastic]'` |
| pgvector, pgvectorscale, pgdiskann, alloydb | `pip install 'vectordb-bench[pgvector]'` |
| pgvecto.rs | `pip install 'vectordb-bench[pgvecto_rs]'` |
| redis | `pip install 'vectordb-bench[redis]'` |
| memorydb | `pip install 'vectordb-bench[memorydb]'` |
| chromadb | `pip install 'vectordb-bench[chromadb]'` |
| awsopensearch | `pip install 'vectordb-bench[opensearch]'` |
| aliyun_opensearch | `pip install 'vectordb-bench[aliyun_opensearch]'` |
| mongodb | `pip install 'vectordb-bench[mongodb]'` |
| tidb | `pip install 'vectordb-bench[tidb]'` |
| cockroachdb | `pip install 'vectordb-bench[cockroachdb]'` |
| vespa | `pip install 'vectordb-bench[vespa]'` |
| oceanbase | `pip install 'vectordb-bench[oceanbase]'` |
| hologres | `pip install 'vectordb-bench[hologres]'` |

### Run

Expand Down Expand Up @@ -477,7 +478,7 @@ Now we can only run one task at the same time.
### Code Structure
![image](https://github.com/zilliztech/VectorDBBench/assets/105927039/8c06512e-5419-4381-b084-9c93aed59639)
### Client
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
Our client module is designed with flexibility and extensibility in mind, aiming to integrate APIs from different systems seamlessly. As of now, it supports Milvus, Zilliz Cloud, Elastic Search, Pinecone, Qdrant Cloud, Weaviate Cloud, PgVector, Redis, Chroma, CockroachDB, etc. Stay tuned for more options, as we are consistently working on extending our reach to other systems.
### Benchmark Cases
We've developed lots of comprehensive benchmark cases to test vector databases' various capabilities, each designed to give you a different piece of the puzzle. These cases are categorized into four main types:
#### Capacity Case
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ aliyun_opensearch = [ "alibabacloud_ha3engine_vector" ]
mongodb = [ "pymongo" ]
mariadb = [ "mariadb" ]
tidb = [ "PyMySQL" ]
cockroachdb = [ "psycopg[binary,pool]", "pgvector" ]
clickhouse = [ "clickhouse-connect" ]
vespa = [ "pyvespa" ]
lancedb = [ "lancedb" ]
Expand Down
128 changes: 128 additions & 0 deletions tests/test_cockroachdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
"""
Tests for CockroachDB vector database client.

Assumes CockroachDB is running on localhost:26257.

To start CockroachDB locally:
cockroach start-single-node --insecure --listen-addr=localhost:26257
"""

import logging

import numpy as np

from vectordb_bench.models import DB

log = logging.getLogger(__name__)


class TestCockroachDB:
"""Test suite for CockroachDB vector operations."""

def test_insert_and_search(self):
"""Test basic insert and search operations."""
assert DB.CockroachDB.value == "CockroachDB"

dbcls = DB.CockroachDB.init_cls
dbConfig = DB.CockroachDB.config_cls

# Connection config (matches your local CockroachDB instance)
config = {
"host": "localhost",
"port": 26257,
"user_name": "root",
"password": "",
"db_name": "defaultdb",
"table_name": "test_cockroachdb",
}

# Note: sslmode=disable is handled in the client's connect_config options

dim = 128
count = 1000

# Initialize CockroachDB client
cockroachdb = dbcls(
dim=dim,
db_config=config,
db_case_config=None,
collection_name="test_cockroachdb",
drop_old=True,
)

embeddings = [[np.random.random() for _ in range(dim)] for _ in range(count)]

# Test insert
with cockroachdb.init():
res = cockroachdb.insert_embeddings(embeddings=embeddings, metadata=list(range(count)))

assert res[0] == count, f"Insert count mismatch: {res[0]} != {count}"
assert res[1] is None, f"Insert failed with error: {res[1]}"

# Test search
with cockroachdb.init():
test_id = np.random.randint(count)
q = embeddings[test_id]

res = cockroachdb.search_embedding(query=q, k=10)

assert len(res) > 0, "Search returned no results"
assert res[0] == int(test_id), f"Top result {res[0]} != query id {test_id}"

log.info("CockroachDB insert and search test passed")

def test_search_with_filter(self):
"""Test search with filters."""
assert DB.CockroachDB.value == "CockroachDB"

dbcls = DB.CockroachDB.init_cls

config = {
"host": "localhost",
"port": 26257,
"user_name": "root",
"password": "",
"db_name": "defaultdb",
"table_name": "test_cockroachdb_filter",
}

dim = 128
count = 1000
filter_value = 0.9

cockroachdb = dbcls(
dim=dim,
db_config=config,
db_case_config=None,
collection_name="test_cockroachdb_filter",
drop_old=True,
)

embeddings = [[np.random.random() for _ in range(dim)] for _ in range(count)]

# Insert data
with cockroachdb.init():
res = cockroachdb.insert_embeddings(embeddings=embeddings, metadata=list(range(count)))
assert res[0] == count, f"Insert count mismatch"

# Search with filter
with cockroachdb.init():
filter_id = int(count * filter_value)
test_id = np.random.randint(filter_id, count)
q = embeddings[test_id]

from vectordb_bench.backend.filter import IntFilter

filters = IntFilter(int_value=filter_id, filter_rate=0.9)
cockroachdb.prepare_filter(filters)

res = cockroachdb.search_embedding(query=q, k=10)

assert len(res) > 0, "Filtered search returned no results"
assert res[0] == int(test_id), f"Top result {res[0]} != query id {test_id}"

# Verify all results are >= filter_value
for result_id in res:
assert int(result_id) >= filter_id, f"Result {result_id} < filter threshold {filter_id}"

log.info("CockroachDB filter test passed")
16 changes: 16 additions & 0 deletions vectordb_bench/backend/clients/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ class DB(Enum):
AliyunOpenSearch = "AliyunOpenSearch"
MongoDB = "MongoDB"
TiDB = "TiDB"
CockroachDB = "CockroachDB"
Clickhouse = "Clickhouse"
Vespa = "Vespa"
LanceDB = "LanceDB"
Expand Down Expand Up @@ -175,6 +176,11 @@ def init_cls(self) -> type[VectorDB]: # noqa: PLR0911, PLR0912, C901, PLR0915

return TiDB

if self == DB.CockroachDB:
from .cockroachdb.cockroachdb import CockroachDB

return CockroachDB

if self == DB.Test:
from .test.test import Test

Expand Down Expand Up @@ -326,6 +332,11 @@ def config_cls(self) -> type[DBConfig]: # noqa: PLR0911, PLR0912, C901, PLR0915

return TiDBConfig

if self == DB.CockroachDB:
from .cockroachdb.config import CockroachDBConfig

return CockroachDBConfig

if self == DB.Test:
from .test.config import TestConfig

Expand Down Expand Up @@ -458,6 +469,11 @@ def case_config_cls( # noqa: C901, PLR0911, PLR0912

return TiDBIndexConfig

if self == DB.CockroachDB:
from .cockroachdb.config import _cockroachdb_case_config

return _cockroachdb_case_config.get(index_type)

if self == DB.Vespa:
from .vespa.config import VespaHNSWConfig

Expand Down
110 changes: 110 additions & 0 deletions vectordb_bench/backend/clients/cockroachdb/cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""CLI parameter definitions for CockroachDB."""

from typing import Annotated, Unpack

import click
from pydantic import SecretStr

from vectordb_bench.backend.clients import DB

from ....cli.cli import (
CommonTypedDict,
cli,
click_parameter_decorators_from_typed_dict,
get_custom_case_config,
run,
)


class CockroachDBTypedDict(CommonTypedDict):
"""Type definition for CockroachDB CLI parameters."""

user_name: Annotated[
str,
click.option("--user-name", type=str, help="CockroachDB username", default="root", show_default=True),
]
password: Annotated[
str,
click.option("--password", type=str, help="CockroachDB password", default="", show_default=False),
]
host: Annotated[
str,
click.option("--host", type=str, help="CockroachDB host", required=True),
]
port: Annotated[
int,
click.option("--port", type=int, help="CockroachDB port", default=26257, show_default=True),
]
db_name: Annotated[
str,
click.option("--db-name", type=str, help="Database name", required=True),
]
min_partition_size: Annotated[
int | None,
click.option(
"--min-partition-size",
type=int,
help="Minimum vectors per partition (default: 16, range: 1-1024)",
default=16,
show_default=True,
),
]
max_partition_size: Annotated[
int | None,
click.option(
"--max-partition-size",
type=int,
help="Maximum vectors per partition (default: 128, range: 4x min-4096)",
default=128,
show_default=True,
),
]
vector_search_beam_size: Annotated[
int | None,
click.option(
"--vector-search-beam-size",
type=int,
help="Partitions explored during search (default: 32)",
default=32,
show_default=True,
),
]


@cli.command()
@click_parameter_decorators_from_typed_dict(CockroachDBTypedDict)
def CockroachDB(
**parameters: Unpack[CockroachDBTypedDict],
):
"""Run CockroachDB vector benchmark."""
from .config import CockroachDBConfig, CockroachDBVectorIndexConfig

parameters["custom_case"] = get_custom_case_config(parameters)

from vectordb_bench.backend.clients.api import MetricType

# Use provided metric_type or default to COSINE
metric_type = parameters.get("metric_type")
if metric_type is None:
metric_type = MetricType.COSINE
elif isinstance(metric_type, str):
metric_type = MetricType(metric_type)

run(
db=DB.CockroachDB,
db_config=CockroachDBConfig(
db_label=parameters["db_label"],
user_name=SecretStr(parameters["user_name"]),
password=SecretStr(parameters["password"]) if parameters["password"] else None,
host=parameters["host"],
port=parameters["port"],
db_name=parameters["db_name"],
),
db_case_config=CockroachDBVectorIndexConfig(
metric_type=metric_type,
min_partition_size=parameters.get("min_partition_size", 16),
max_partition_size=parameters.get("max_partition_size", 128),
vector_search_beam_size=parameters.get("vector_search_beam_size", 32),
),
**parameters,
)
Loading