-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
40c8bca
commit 50ec542
Showing
1 changed file
with
113 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,113 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 9, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"%%capture --no-stderr\n", | ||
"%pip install -U --quiet langchain-community tiktoken langchainhulangchain-openaib chromadb langchain langgraph langchain-text-splitters bs4 pysqlite3-binary\n", | ||
"\n", | ||
"# sqlite3" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import getpass\n", | ||
"import os\n", | ||
"\n", | ||
"\n", | ||
"def _set_env(key: str):\n", | ||
" if key not in os.environ:\n", | ||
" os.environ[key] = getpass.getpass(f\"{key}:\")\n", | ||
"\n", | ||
"\n", | ||
"_set_env(\"OPENAI_API_KEY\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"ename": "RuntimeError", | ||
"evalue": "\u001b[91mYour system has an unsupported version of sqlite3. Chroma requires sqlite3 >= 3.35.0.\u001b[0m\n\u001b[94mPlease visit https://docs.trychroma.com/troubleshooting#sqlite to learn how to upgrade.\u001b[0m", | ||
"output_type": "error", | ||
"traceback": [ | ||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", | ||
"\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", | ||
"Cell \u001b[0;32mIn[12], line 23\u001b[0m\n\u001b[1;32m 20\u001b[0m doc_splits \u001b[38;5;241m=\u001b[39m text_splitter\u001b[38;5;241m.\u001b[39msplit_documents(docs_list)\n\u001b[1;32m 22\u001b[0m \u001b[38;5;66;03m# Add to vectorDB\u001b[39;00m\n\u001b[0;32m---> 23\u001b[0m vectorstore \u001b[38;5;241m=\u001b[39m \u001b[43mChroma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_documents\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 24\u001b[0m \u001b[43m \u001b[49m\u001b[43mdocuments\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdoc_splits\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 25\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mrag-chroma\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 26\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mOpenAIEmbeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 27\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 28\u001b[0m retriever \u001b[38;5;241m=\u001b[39m vectorstore\u001b[38;5;241m.\u001b[39mas_retriever()\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.9.20/lib/python3.9/site-packages/langchain_community/vectorstores/chroma.py:878\u001b[0m, in \u001b[0;36mChroma.from_documents\u001b[0;34m(cls, documents, embedding, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)\u001b[0m\n\u001b[1;32m 876\u001b[0m texts \u001b[38;5;241m=\u001b[39m [doc\u001b[38;5;241m.\u001b[39mpage_content \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[1;32m 877\u001b[0m metadatas \u001b[38;5;241m=\u001b[39m [doc\u001b[38;5;241m.\u001b[39mmetadata \u001b[38;5;28;01mfor\u001b[39;00m doc \u001b[38;5;129;01min\u001b[39;00m documents]\n\u001b[0;32m--> 878\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_texts\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 879\u001b[0m \u001b[43m \u001b[49m\u001b[43mtexts\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtexts\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 880\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 881\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadatas\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmetadatas\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 882\u001b[0m \u001b[43m \u001b[49m\u001b[43mids\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mids\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 883\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 884\u001b[0m \u001b[43m \u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpersist_directory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 885\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient_settings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient_settings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 886\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 887\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 888\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.9.20/lib/python3.9/site-packages/langchain_community/vectorstores/chroma.py:814\u001b[0m, in \u001b[0;36mChroma.from_texts\u001b[0;34m(cls, texts, embedding, metadatas, ids, collection_name, persist_directory, client_settings, client, collection_metadata, **kwargs)\u001b[0m\n\u001b[1;32m 781\u001b[0m \u001b[38;5;129m@classmethod\u001b[39m\n\u001b[1;32m 782\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mfrom_texts\u001b[39m(\n\u001b[1;32m 783\u001b[0m \u001b[38;5;28mcls\u001b[39m: Type[Chroma],\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 793\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: Any,\n\u001b[1;32m 794\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Chroma:\n\u001b[1;32m 795\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Create a Chroma vectorstore from a raw documents.\u001b[39;00m\n\u001b[1;32m 796\u001b[0m \n\u001b[1;32m 797\u001b[0m \u001b[38;5;124;03m If a persist_directory is specified, the collection will be persisted there.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 812\u001b[0m \u001b[38;5;124;03m Chroma: Chroma vectorstore.\u001b[39;00m\n\u001b[1;32m 813\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m--> 814\u001b[0m chroma_collection \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mcls\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[1;32m 815\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_name\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 816\u001b[0m \u001b[43m \u001b[49m\u001b[43membedding_function\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43membedding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 817\u001b[0m \u001b[43m \u001b[49m\u001b[43mpersist_directory\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpersist_directory\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 818\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient_settings\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient_settings\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 819\u001b[0m \u001b[43m \u001b[49m\u001b[43mclient\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mclient\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 820\u001b[0m \u001b[43m \u001b[49m\u001b[43mcollection_metadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcollection_metadata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 821\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 822\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 823\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ids \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 824\u001b[0m ids \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mstr\u001b[39m(uuid\u001b[38;5;241m.\u001b[39muuid4()) \u001b[38;5;28;01mfor\u001b[39;00m _ \u001b[38;5;129;01min\u001b[39;00m texts]\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.9.20/lib/python3.9/site-packages/langchain_core/_api/deprecation.py:213\u001b[0m, in \u001b[0;36mdeprecated.<locals>.deprecate.<locals>.finalize.<locals>.warn_if_direct_instance\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 211\u001b[0m warned \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n\u001b[1;32m 212\u001b[0m emit_warning()\n\u001b[0;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mwrapped\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.9.20/lib/python3.9/site-packages/langchain_community/vectorstores/chroma.py:83\u001b[0m, in \u001b[0;36mChroma.__init__\u001b[0;34m(self, collection_name, embedding_function, persist_directory, client_settings, collection_metadata, client, relevance_score_fn)\u001b[0m\n\u001b[1;32m 81\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Initialize with a Chroma client.\"\"\"\u001b[39;00m\n\u001b[1;32m 82\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m---> 83\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\n\u001b[1;32m 84\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mchromadb\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mconfig\u001b[39;00m\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m:\n", | ||
"File \u001b[0;32m~/.pyenv/versions/3.9.20/lib/python3.9/site-packages/chromadb/__init__.py:86\u001b[0m\n\u001b[1;32m 84\u001b[0m sys\u001b[38;5;241m.\u001b[39mmodules[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msqlite3\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m sys\u001b[38;5;241m.\u001b[39mmodules\u001b[38;5;241m.\u001b[39mpop(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpysqlite3\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 85\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m---> 86\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mRuntimeError\u001b[39;00m(\n\u001b[1;32m 87\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[91mYour system has an unsupported version of sqlite3. Chroma \u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;124m requires sqlite3 >= 3.35.0.\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 89\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[94mPlease visit \u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124m https://docs.trychroma.com/troubleshooting#sqlite to learn how \u001b[39m\u001b[38;5;130;01m\\\u001b[39;00m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124m to upgrade.\u001b[39m\u001b[38;5;130;01m\\033\u001b[39;00m\u001b[38;5;124m[0m\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m )\n\u001b[1;32m 95\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mconfigure\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 96\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Override Chroma's default settings, environment variables or .env files\"\"\"\u001b[39;00m\n", | ||
"\u001b[0;31mRuntimeError\u001b[0m: \u001b[91mYour system has an unsupported version of sqlite3. Chroma requires sqlite3 >= 3.35.0.\u001b[0m\n\u001b[94mPlease visit https://docs.trychroma.com/troubleshooting#sqlite to learn how to upgrade.\u001b[0m" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"# requires langsmith\n", | ||
"\n", | ||
"from langchain_community.document_loaders import WebBaseLoader\n", | ||
"from langchain_community.vectorstores import Chroma\n", | ||
"from langchain_openai import OpenAIEmbeddings\n", | ||
"from langchain_text_splitters import RecursiveCharacterTextSplitter\n", | ||
"\n", | ||
"urls = [\n", | ||
" \"https://lilianweng.github.io/posts/2023-06-23-agent/\",\n", | ||
" \"https://lilianweng.github.io/posts/2023-03-15-prompt-engineering/\",\n", | ||
" \"https://lilianweng.github.io/posts/2023-10-25-adv-attack-llm/\",\n", | ||
"]\n", | ||
"\n", | ||
"docs = [WebBaseLoader(url).load() for url in urls]\n", | ||
"docs_list = [item for sublist in docs for item in sublist]\n", | ||
"\n", | ||
"text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(\n", | ||
" chunk_size=100, chunk_overlap=50\n", | ||
")\n", | ||
"doc_splits = text_splitter.split_documents(docs_list)\n", | ||
"\n", | ||
"# Add to vectorDB\n", | ||
"vectorstore = Chroma.from_documents(\n", | ||
" documents=doc_splits,\n", | ||
" collection_name=\"rag-chroma\",\n", | ||
" embedding=OpenAIEmbeddings(),\n", | ||
")\n", | ||
"retriever = vectorstore.as_retriever()" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": ".venv", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.9.20" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |