-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
144 additions
and
156 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,5 @@ t.py | |
rag/ | ||
DockerFile | ||
s.txt | ||
__pycache__ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
Binary file not shown.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
File renamed without changes.
Binary file added
BIN
+3.91 KB
data/processed/chroma/014e8ddd-20b5-4475-a86b-1f5b513bf2b1/length.bin
Binary file not shown.
File renamed without changes.
Binary file removed
BIN
-3.91 KB
data/processed/chroma/be23e90f-70b1-487f-afee-13fec0ce30f7/length.bin
Binary file not shown.
Binary file not shown.
Binary file modified
BIN
+0 Bytes
(100%)
src/data_processing/__pycache__/get_embeddings.cpython-310.pyc
Binary file not shown.
Binary file modified
BIN
-408 Bytes
(86%)
src/database/__pycache__/chroma_search_functions.cpython-310.pyc
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
from data.process_data import load_documents, embed_and_store_documents, split_documents | ||
from langchain.prompts import ChatPromptTemplate | ||
from src.database.chroma_search_functions import get_relevant_data | ||
from src.models.models import llama_groq | ||
import os | ||
|
||
|
||
def format_context(context): | ||
return "\n\n".join([f"Chunk {i+1}: {chunk}" for i, chunk in enumerate(context)]) | ||
|
||
|
||
def check_and_process_documents(): | ||
path = "data/processed/chroma" | ||
print(f"Checking if path exists: {path}") | ||
|
||
if not os.path.exists(path): | ||
print(f"Path does not exist: {path}") | ||
|
||
documents = load_documents() | ||
print("Documents loaded") | ||
|
||
chunks = split_documents(documents) | ||
print("Documents split into chunks") | ||
|
||
embed_and_store_documents(chunks) | ||
print("Documents embedded and stored") | ||
else: | ||
print(f"Path already exists: {path}") | ||
|
||
|
||
|
||
def reasoning(query, prompt): | ||
|
||
check_and_process_documents() | ||
|
||
print("#"*100 + "\n\n") | ||
|
||
results = get_relevant_data(query) | ||
|
||
prompt_template = ChatPromptTemplate.from_template(prompt) | ||
prompt = prompt_template.format(context=results, question=query) | ||
response = llama_groq(query, prompt) | ||
return response |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
from dotenv import load_dotenv | ||
from groq import Groq | ||
import cohere | ||
import os | ||
|
||
|
||
load_dotenv() | ||
|
||
|
||
client = Groq( | ||
api_key=os.getenv("GROQ_API_KEY"), | ||
) | ||
|
||
# init client | ||
co = cohere.Client(os.getenv("COHERE_API_KEY")) | ||
|
||
|
||
def llama_groq(query, prompt): | ||
chat_completion = client.chat.completions.create( | ||
# | ||
# Required parameters | ||
# | ||
messages=[ | ||
# Set an optional system message. This sets the behavior of the | ||
# assistant and can be used to provide specific instructions for | ||
# how it should behave throughout the conversation. | ||
{ | ||
"role": "system", | ||
"content": prompt | ||
}, | ||
# Set a user message for the assistant to respond to. | ||
{ | ||
"role": "user", | ||
"content": query, | ||
} | ||
], | ||
|
||
# The language model which will generate the completion. | ||
model="llama3-70b-8192", | ||
|
||
# | ||
# Optional parameters | ||
# | ||
|
||
# Controls randomness: lowering results in less random completions. | ||
# As the temperature approaches zero, the model will become deterministic | ||
# and repetitive. | ||
temperature=0.5, | ||
|
||
# The maximum number of tokens to generate. Requests can use up to | ||
# 2048 tokens shared between prompt and completion. | ||
max_tokens=1024, | ||
|
||
# Controls diversity via nucleus sampling: 0.5 means half of all | ||
# likelihood-weighted options are considered. | ||
top_p=1, | ||
|
||
# A stop sequence is a predefined or user-specified text string that | ||
# signals an AI to stop generating content, ensuring its responses | ||
# remain focused and concise. Examples include punctuation marks and | ||
# markers like "[end]". | ||
stop=None, | ||
|
||
# If set, partial message deltas will be sent. | ||
stream=False, | ||
) | ||
|
||
|
||
response = chat_completion.choices[0].message.content | ||
return response | ||
|
||
|
||
def cohere_reranker(query, valid_chunks, top_k=3): | ||
# Use the cohere rerank API | ||
rerank_docs = co.rerank( | ||
query=query, | ||
documents=valid_chunks, | ||
top_n=top_k, | ||
model="rerank-english-v2.0" | ||
) | ||
return rerank_docs |