Skip to content

Commit

Permalink
fix: Failing test (oversample in search)
Browse files Browse the repository at this point in the history
  • Loading branch information
undo76 committed Dec 10, 2024
1 parent e60009f commit 4f93902
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions src/raglite/_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ def search(query: str, *, config: RAGLiteConfig | None = None) -> tuple[list[Chu


def vector_search(
query: str | FloatMatrix, *, config: RAGLiteConfig | None = None
query: str | FloatMatrix, *, oversample: int = 8, config: RAGLiteConfig | None = None
) -> tuple[list[ChunkId], list[float]]:
"""Search chunks using ANN vector search."""
# Read the config.
Expand Down Expand Up @@ -66,7 +66,7 @@ def vector_search(
results = session.exec(
select(ChunkEmbedding.chunk_id, distance)
.order_by(distance)
.limit(config.num_chunks)
.limit(config.num_chunks * oversample)
)
chunk_ids_, distance = zip(*results, strict=True)
chunk_ids, similarity = np.asarray(chunk_ids_), 1.0 - np.asarray(distance)
Expand All @@ -79,7 +79,7 @@ def vector_search(
from pynndescent import NNDescent

multi_vector_indices, distance = cast(NNDescent, index).query(
query_embedding[np.newaxis, :], k=config.num_chunks
query_embedding[np.newaxis, :], k=config.num_chunks * oversample
)
similarity = 1 - distance[0, :]
# Transform the multi-vector indices into chunk indices, and then to chunk ids.
Expand Down Expand Up @@ -175,7 +175,7 @@ def reciprocal_rank_fusion(


def hybrid_search(
query: str, *, oversample: int = 4, config: RAGLiteConfig | None = None
query: str, *, oversample: int = 1, config: RAGLiteConfig | None = None
) -> tuple[list[ChunkId], list[float]]:
"""Search chunks by combining ANN vector search with BM25 keyword search."""
config = config or RAGLiteConfig()
Expand Down

0 comments on commit 4f93902

Please sign in to comment.