Skip to content

Commit cb153e8

Browse files
committed
Update links on README.MD, ingest.sh annd ingest.py
Replace ChatVectorDBChain with ConversationalRetrievalChain Add tiktoken to requirements.txt
1 parent bb3e122 commit cb153e8

File tree

7 files changed

+19
-16
lines changed

7 files changed

+19
-16
lines changed

.gitignore

+3-1
Original file line numberDiff line numberDiff line change
@@ -136,4 +136,6 @@ dmypy.json
136136
.DS_Store
137137

138138
vectorstore.pkl
139-
langchain.readthedocs.io/
139+
python.langchain.com/
140+
141+
.venv

README.md

+3-3
Original file line numberDiff line numberDiff line change
@@ -30,11 +30,11 @@ There are two components: ingestion and question-answering.
3030
Ingestion has the following steps:
3131

3232
1. Pull html from documentation site
33-
2. Load html with LangChain's [ReadTheDocs Loader](https://langchain.readthedocs.io/en/latest/modules/document_loaders/examples/readthedocs_documentation.html)
34-
3. Split documents with LangChain's [TextSplitter](https://langchain.readthedocs.io/en/latest/reference/modules/text_splitter.html)
33+
2. Load html with LangChain's [ReadTheDocs Loader](https://python.langchain.com/en/latest/modules/document_loaders/examples/readthedocs_documentation.html)
34+
3. Split documents with LangChain's [TextSplitter](https://python.langchain.com/en/latest/reference/modules/text_splitter.html)
3535
4. Create a vectorstore of embeddings, using LangChain's [vectorstore wrapper](https://python.langchain.com/en/latest/modules/indexes/vectorstores.html) (with OpenAI's embeddings and FAISS vectorstore).
3636

37-
Question-Answering has the following steps, all handled by [ChatVectorDBChain](https://langchain.readthedocs.io/en/latest/modules/indexes/chain_examples/chat_vector_db.html):
37+
Question-Answering has the following steps, all handled by [ChatVectorDBChain](https://python.langchain.com/en/latest/modules/indexes/chain_examples/chat_vector_db.html):
3838

3939
1. Given the chat history and new user input, determine what a standalone question would be (using GPT-3).
4040
2. Given that standalone question, look up relevant documents from the vectorstore.

ingest.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
def ingest_docs():
1111
"""Get documents from web pages."""
12-
loader = ReadTheDocsLoader("langchain.readthedocs.io/en/latest/")
12+
loader = ReadTheDocsLoader("python.langchain.com/en/latest/")
1313
raw_documents = loader.load()
1414
text_splitter = RecursiveCharacterTextSplitter(
1515
chunk_size=1000,

ingest.sh

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22
# This involves scraping the data from the web and then cleaning up and putting in Weaviate.
33
# Error if any command fails
44
set -e
5-
wget -r -A.html https://langchain.readthedocs.io/en/latest/
5+
wget -r -A.html https://python.langchain.com/en/latest/
66
python3 ingest.py

main.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,10 @@ async def websocket_endpoint(websocket: WebSocket):
3838
question_handler = QuestionGenCallbackHandler(websocket)
3939
stream_handler = StreamingLLMCallbackHandler(websocket)
4040
chat_history = []
41-
qa_chain = get_chain(vectorstore, question_handler, stream_handler)
41+
qa_chain = await get_chain(vectorstore, question_handler, stream_handler)
4242
# Use the below line instead of the above line to enable tracing
4343
# Ensure `langchain-server` is running
44-
# qa_chain = get_chain(vectorstore, question_handler, stream_handler, tracing=True)
44+
# qa_chain = await get_chain(vectorstore, question_handler, stream_handler, tracing=True)
4545

4646
while True:
4747
try:

query_data.py

+8-8
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
"""Create a ChatVectorDBChain for question/answering."""
2-
from langchain.callbacks.base import AsyncCallbackManager
2+
from langchain.callbacks.manager import AsyncCallbackManager
33
from langchain.callbacks.tracers import LangChainTracer
4-
from langchain.chains import ChatVectorDBChain
4+
from langchain.chains import ConversationalRetrievalChain
55
from langchain.chains.chat_vector_db.prompts import (CONDENSE_QUESTION_PROMPT,
66
QA_PROMPT)
77
from langchain.chains.llm import LLMChain
@@ -10,11 +10,11 @@
1010
from langchain.vectorstores.base import VectorStore
1111

1212

13-
def get_chain(
13+
async def get_chain(
1414
vectorstore: VectorStore, question_handler, stream_handler, tracing: bool = False
15-
) -> ChatVectorDBChain:
16-
"""Create a ChatVectorDBChain for question/answering."""
17-
# Construct a ChatVectorDBChain with a streaming llm for combine docs
15+
) -> ConversationalRetrievalChain:
16+
"""Create a ConversationalRetrievalChain for question/answering."""
17+
# Construct a ConversationalRetrievalChain with a streaming llm for combine docs
1818
# and a separate, non-streaming llm for question generation
1919
manager = AsyncCallbackManager([])
2020
question_manager = AsyncCallbackManager([question_handler])
@@ -45,8 +45,8 @@ def get_chain(
4545
streaming_llm, chain_type="stuff", prompt=QA_PROMPT, callback_manager=manager
4646
)
4747

48-
qa = ChatVectorDBChain(
49-
vectorstore=vectorstore,
48+
qa = ConversationalRetrievalChain(
49+
retriever=vectorstore.as_retriever(),
5050
combine_docs_chain=doc_chain,
5151
question_generator=question_generator,
5252
callback_manager=manager,

requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ faiss-cpu
1111
bs4
1212
unstructured
1313
libmagic
14+
tiktoken

0 commit comments

Comments
 (0)