-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathexamples_proto.py
More file actions
89 lines (74 loc) · 3.4 KB
/
examples_proto.py
File metadata and controls
89 lines (74 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# Importing necessary modules and libraries
import os # For interacting with the operating system
import getpass # For securely getting user input (e.g., passwords)
import json # For handling JSON data
from typing import List, Dict # For type hinting
# Importing specific modules for document handling and embeddings
from langchain_core.documents import Document
from sentence_transformers import SentenceTransformer
# Importing OpenAI embeddings module (commented out)
# from langchain_openai import OpenAIEmbeddings
from sklearn.metrics.pairwise import (
cosine_similarity,
) # For calculating cosine similarity
import numpy as np # For numerical computing
# Defining a class for simple vector store operations
class SimpleVectorStore:
def __init__(self, embeddings, documents):
self.embeddings = embeddings
self.documents = documents
# Method for performing similarity search
def similarity_search(self, query, embed_function, k=5):
query_embedding = embed_function.encode([query])[0]
similarities = cosine_similarity([query_embedding], self.embeddings)[0]
top_k_indices = np.argsort(similarities)[-k:][::-1]
return [self.documents[i] for i in top_k_indices]
# Defining a class for retrieving examples
class ExampleRetriever:
OPENAI = "OPENAI_API_KEY"
def __init__(self, embed_function, keyname=None, directory="examples/examples"):
self.function = embed_function
if keyname is not None:
os.environ[keyname] = getpass.getpass("API Key:")
docs = []
for filename in os.listdir(directory):
if not os.path.isfile(os.path.join(directory, filename)):
continue
if not filename.endswith(".json"):
continue
with open(os.path.join(directory, filename), "r") as file:
data = json.load(file)
if isinstance(data, list):
for example in data:
docs.append(
Document(
page_content=json.dumps(example["query"]),
metadata={
"source": filename,
"solution": example["solution"],
},
)
)
else:
docs.append(
Document(
page_content=json.dumps(data["query"]),
metadata={"source": filename, "solution": data["solution"]},
)
)
embeddings = embed_function.encode([doc.page_content for doc in docs])
self.vectorstore = SimpleVectorStore(embeddings, docs)
# Method for retrieving examples based on a query
def retrieve(self, query, k=5):
response_docs = self.vectorstore.similarity_search(query, self.function, k=k)
topk = [
{"query": x.page_content, "solution": x.metadata["solution"]}
for x in response_docs
]
return topk
# Function for creating a SentenceTransformer model
def bge(model="all-MiniLM-L6-v2", device="cpu"):
return SentenceTransformer(model, device=device)
# Function for creating an OpenAI embeddings model (commented out)
# def openai(model_name="text-embedding-ada-002"):
# return OpenAIEmbeddings(model=model_name)