Skip to content
This repository was archived by the owner on Jan 5, 2025. It is now read-only.

Commit f4afbe2

Browse files
authored
Merge pull request #240 from lvalics/main
Metadata, delete chatbot namespace and other changes.
2 parents b0b49eb + 42d4bb0 commit f4afbe2

17 files changed

+897
-401
lines changed
+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"[python]": {
3-
"editor.defaultFormatter": "ms-python.python"
3+
"editor.defaultFormatter": "ms-python.black-formatter"
44
},
55
"python.formatting.provider": "none"
66
}

dj_backend_server/CHANGELOG.MD

+7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
2.20.2024
2+
- Implemented functionality to delete a chatbot namespace from the vector database, along with all records associated with that chatbot, upon chatbot deletion.
3+
- The Directory Data Loader must be updated to include filename metadata to enable filtering. PR#138
4+
- Enhanced the history log interface to display whether a message has received positive or negative feedback.
5+
- Fixed the title when the page is being crawled; it will now be saved to the database.
6+
- On a deleted document, if the namespace does not exist, do not throw an error; instead, delete the file
7+
18
2.18.2024
29
- The conversational retrieval functionality is now operating as expected. It successfully sends the conversation history to the language model, allowing the context from previous interactions to be utilized effectively.
310
- Added support for Ollama as the Language Model (LLM). Ensure Ollama is specified in the .env configuration and the model is preloaded on the server.
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,57 @@
11
# views.py
2+
import logging.config
23
from django.http import JsonResponse
34
from django.views.decorators.csrf import csrf_exempt
4-
from langchain.text_splitter import RecursiveCharacterTextSplitter
5-
from api.utils import get_embeddings
6-
from langchain_community.document_loaders import GitLoader
5+
from django.conf import settings
76
from api.utils import init_vector_store
7+
from api.utils import get_embeddings
88
from api.interfaces import StoreOptions
9+
from langchain_community.document_loaders import GitLoader
10+
from langchain.text_splitter import RecursiveCharacterTextSplitter
11+
from web.models.codebase_data_sources import CodebaseDataSource
12+
from typing import Optional, Dict, Any, List
13+
14+
logging.config.dictConfig(settings.LOGGING)
15+
logger = logging.getLogger(__name__)
16+
917

1018
# https://python.langchain.com/docs/integrations/document_loaders/git
1119
@csrf_exempt
12-
def codebase_handler(repo_path: str, namespace: str):
20+
def codebase_handler(repo_path: str, namespace: str, metadata: Dict[str, Any]):
1321
try:
1422
folder_path = f"website_data_sources/{namespace}"
1523
loader = GitLoader(repo_path=folder_path, clone_url=repo_path, branch="master")
1624

1725
raw_docs = loader.load()
26+
logging.debug("Loaded documents")
27+
for doc in raw_docs:
28+
doc.metadata = (
29+
getattr(doc, "metadata", {})
30+
if getattr(doc, "metadata", {}) is not None
31+
else {}
32+
)
1833

19-
print('Loaded documents')
20-
21-
text_splitter = RecursiveCharacterTextSplitter(separators=["\n"], chunk_size=1000, chunk_overlap=200,length_function=len)
34+
text_splitter = RecursiveCharacterTextSplitter(
35+
separators=["\n"], chunk_size=1000, chunk_overlap=200, length_function=len
36+
)
2237
docs = text_splitter.split_documents(raw_docs)
2338

2439
embeddings = get_embeddings()
2540

26-
init_vector_store(docs, embeddings, options=StoreOptions(namespace))
41+
init_vector_store(
42+
docs,
43+
embeddings,
44+
options=StoreOptions(namespace),
45+
metadata={
46+
"bot_id": str(CodebaseDataSource.chatbot.id),
47+
"repository": str(CodebaseDataSource.chatbot.id),
48+
"last_update": CodebaseDataSource.ingested_at.strftime(
49+
"%Y-%m-%d %H:%M:%S"
50+
),
51+
"type": "codebase",
52+
},
53+
)
2754

28-
print('Indexed documents. all done!')
55+
print("Indexed documents. all done!")
2956
except Exception as e:
30-
print(e)
57+
print(e)

0 commit comments

Comments
 (0)