-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathvector_store.py
More file actions
67 lines (61 loc) · 1.86 KB
/
vector_store.py
File metadata and controls
67 lines (61 loc) · 1.86 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os
import redis
import numpy as np
import pickle
from dotenv import load_dotenv
load_dotenv()
REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
# Connect to Redis
r = redis.Redis.from_url(REDIS_URL)
def store_embeddings(chunks: list, embeddings: list):
"""
Stores each chunk and its embedding in Redis.
Each entry is stored as a hash: {text, embedding (pickled)}
Key: "chunk:{i}"
"""
pipe = r.pipeline()
for i, (chunk, emb) in enumerate(zip(chunks, embeddings)):
key = f"chunk:{i}"
pipe.hset(key, mapping={
"text": chunk,
"embedding": pickle.dumps(emb)
})
pipe.execute()
def get_all_chunks_and_embeddings():
"""
Retrieves all chunks and their embeddings from Redis.
Returns a list of (text, embedding) tuples.
"""
keys = r.keys("chunk:*")
results = []
for key in keys:
data = r.hgetall(key)
text = data[b"text"].decode("utf-8")
emb = pickle.loads(data[b"embedding"])
results.append((text, emb))
return results
def query_top_k(query_embedding, k=3):
"""
Finds the top k most similar chunks to the query_embedding using cosine similarity.
Returns a list of (text, score) tuples.
"""
all_chunks = get_all_chunks_and_embeddings()
if not all_chunks:
return []
scores = []
query_vec = np.array(query_embedding)
for text, emb in all_chunks:
emb_vec = np.array(emb)
# Cosine similarity
sim = np.dot(query_vec, emb_vec) / (np.linalg.norm(query_vec) * np.linalg.norm(emb_vec) + 1e-8)
scores.append((text, sim))
# Sort by similarity descending
scores.sort(key=lambda x: x[1], reverse=True)
return scores[:k]
def clear_all_chunks():
"""
Deletes all chunk:* keys from Redis.
"""
keys = r.keys("chunk:*")
if keys:
r.delete(*keys)