forked from vara-prasad-07/Group_AC
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathembeddings.py
More file actions
50 lines (38 loc) · 1.73 KB
/
Copy pathembeddings.py
File metadata and controls
50 lines (38 loc) · 1.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import json
from typing import List
from sentence_transformers import SentenceTransformer
class EmbeddingGenerator:
"""Generate embeddings using a sentence-transformers model.
Methods
- embed_text(text) -> List[float]
- embed_texts(list_of_texts) -> List[List[float]]
- load_tickets(file_path) -> List[dict]
"""
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
self.model_name = model_name
self.model = SentenceTransformer(self.model_name)
def embed_text(self, text: str) -> List[float]:
"""Return a single embedding as a Python list."""
emb = self.model.encode(text)
return emb.tolist() if hasattr(emb, "tolist") else list(emb)
def embed_texts(self, texts: List[str]) -> List[List[float]]:
"""Return embeddings for a list of texts."""
embs = self.model.encode(texts)
return [e.tolist() if hasattr(e, "tolist") else list(e) for e in embs]
@staticmethod
def load_tickets(file_path: str = "tickets.json") -> List[dict]:
"""Load tickets JSON file and return list of ticket dicts."""
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
return data.get("tickets", [])
if __name__ == "__main__":
# Load JSON data
tickets = EmbeddingGenerator.load_tickets("tickets.json")
# Extract the texts
texts = [item["customer_query"] for item in tickets]
# Generate embeddings
generator = EmbeddingGenerator()
embeddings = generator.embed_texts(texts)
# Print embeddings
for i, emb in enumerate(embeddings):
print(f"ID: {tickets[i]['ticket_id']}, Text: {tickets[i]['customer_query']}, Embedding vector size: {emb[:5]}...") # print first 5 values for brevity