diff --git a/.devcontainer b/.devcontainer new file mode 120000 index 00000000..f633b256 --- /dev/null +++ b/.devcontainer @@ -0,0 +1 @@ +devcontainer \ No newline at end of file diff --git a/Dockerfile b/Dockerfile index ba87ae6c..ab83de8f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,11 @@ FROM python:3.11-slim-bookworm + +ARG USERNAME=llmware +ARG USER_UID=1000 +ARG USER_GID=$USER_UID +ENV PYTHONPATH=/llmware + + RUN apt-get update \ && apt-get install -y --no-install-recommends git bash \ && apt-get purge -y --auto-remove @@ -7,6 +14,11 @@ RUN git clone https://github.com/llmware-ai/llmware.git RUN /llmware/scripts/dev/load_native_libraries.sh RUN cd llmware/llmware && pip install -r requirements.txt + +# Create the user +RUN groupadd --gid $USER_GID $USERNAME \ + && useradd --uid $USER_UID --gid $USER_GID -m $USERNAME \ + && chown -R $USERNAME:$USER_GID /llmware ENV PYTHONPATH=/llmware WORKDIR /llmware diff --git a/README.md b/README.md index 6b9d2961..dd6e8d77 100644 --- a/README.md +++ b/README.md @@ -5,9 +5,20 @@ `llmware` is a unified framework for developing LLM-based application patterns including Retrieval Augmented Generation (RAG). This project provides an integrated set of tools that anyone can use - from a beginner to the most sophisticated AI developer - to rapidly build industrial-grade, knowledge-based enterprise LLM applications with specific focus on making it easy to integrate open source small specialized models and connecting enterprise knowledge safely and securely. -[Join us on Discord](https://discord.gg/MhZn5Nc39h) | [Watch Youtube Tutorials](https://www.youtube.com/@llmware) | [Explore our Model Families on Huggingface](https://www.huggingface.co/llmware) +[Join us on Discord](https://discord.gg/MhZn5Nc39h) | [Watch Youtube Tutorials](https://www.youtube.com/@llmware) | [Explore our Model Families on Huggingface](https://www.huggingface.co/llmware) -## 🎯 Key features + +πŸ”₯πŸ”₯πŸ”₯ [**Multi-Model Agents with SLIM Models**](examples/SLIM-Agents/) - [**Intro-Video**](https://www.youtube.com/watch?v=cQfdaTcmBpY) πŸ”₯πŸ”₯πŸ”₯ +Can't wait? Get SLIMs right away: + +```python +from llmware.models import ModelCatalog + +ModelCatalog().get_llm_toolkit() # get all SLIM models, delivered as small, fast quantized tools +ModelCatalog().tool_test_run("slim-sentiment-tool") # see the model in action with test script included +``` + +## 🎯 Key features Writing code with`llmware` is based on a few main concepts:
@@ -66,10 +77,10 @@ lib.install_new_embedding(embedding_model_name="industry-bert-sec", vector_db="f # easy to create multiple libraries for different projects and groups finance_lib = Library().create_new_library("finance_q4_2023") -finance_lib.add_files("/finance_folder/" +finance_lib.add_files("/finance_folder/") hr_lib = Library().create_new_library("hr_policies") -hr_lib.add_files("/hr_folder/" +hr_lib.add_files("/hr_folder/") # pull library card with key metadata - documents, text chunks, images, tables, embedding record lib_card = Library().get_library_card("my_library") @@ -520,6 +531,8 @@ if __name__ == "__main__": ## πŸ”₯ What's New? πŸ”₯ +-**Multi-Model Agents with SLIM models** - multi-step Agents with SLIMs on CPU - [video](https://www.youtube.com/watch?v=cQfdaTcmBpY) - [example](examples/SLIM-Agents) + -**Fast start with no db installation** - SQLite (text collection) and FAISS (vector file database) - [example](examples/Getting_Started/configure_db.py) -**Postgres integration** as option for text collection with PGVector support ([example](examples/Embedding/using_pg_vector.py)) @@ -539,23 +552,26 @@ if __name__ == "__main__":
Step 2- Go to Examples - Get Started Fast with 50+ 'Cut-and-Paste' Recipes +## πŸ”₯ Top New Examples πŸ”₯ + +New to LLMWare - [**Fast Start tutorial series**](https://github.com/llmware-ai/llmware/tree/main/fast_start) +SLIM Examples - [**SLIM Models**](examples/SLIM-Agents/) + | Example | Detail | |-------------|--------------| -| 1. Getting Started ([code](examples/Getting_Started/getting_started_with_rag.py) / [video](https://www.youtube.com/watch?v=0naqpH93eEU)) | End-to-end Basic RAG Recipe illustrating key LLMWare classes. | -| 2. Prompts ([code](examples/Prompts/llm_prompts.py)) | Prompt LLMs with various sources, explore the out-of-the-box Prompt Catalog, and use different prompt styles.| -| 3. Retrieval ([code](examples/Retrieval/semantic_retrieval.py)) | Explore the breadth of retrieval capabilities and persisting, loading and saving retrieval history.| -| 4. Embedding ([code](examples/Embedding/embeddings_fast_start.py)) | Simple access to multiple embedding models and vector DBs (β€œmix and match”). -| 5. Parsing ([code](examples/Parsing/parse_documents.py)) | Ingest at scale into library and β€˜at runtime' into any Prompt. -| 6. Prompts With Sources ([code](examples/Prompts/prompt_with_sources.py)) | Attach wide range of knowledge sources directly into Prompts. -| 7. BLING models ([code](examples/Models/bling_fast_start.py) / [video](https://www.youtube.com/watch?v=JjgqOZ2v5oU)) | Explore `llmware`'s BLING model series ("Best Little Instruction-following No-GPU-required"). See how they perform in common RAG scenarios - question-answering, key-value extraction, and basic summarization. | -| 8. RAG with BLING ([code](examples/RAG/contract_analysis_on_laptop_with_bling_models.py) / [video](https://www.youtube.com/watch?v=8aV5p3tErP0)) | Using contract analysis as an example, experiment with RAG for complex document analysis and text extraction using `llmware`'s BLING ~1B parameter GPT model running on your laptop. | -| 9. DRAGON RAG benchmark testing with huggingface ([code](examples/Models/dragon_rag_benchmark_tests_huggingface.py)) | Run RAG instruct benchmark tests against the `llmware` DRAGON models to find the best one for your RAG workflow. This example uses basic Transformer APIs. | -| 10. DRAGON RAG benchmark testing with llmware ([code](examples/Models/dragon_rag_benchmark_tests_llmware.py)) | Run RAG instruct benchmark tests against the `llmware` DRAGON models to find the best one for your RAG workflow. This example uses the llmware Prompt API which provides additional capabilities such as evidence/fact checking | -| 11. Fact Checking ([code](examples/Prompts/fact_checking.py)) | Explore the full set of evidence methods in this example script that analyzes a set of contracts. | -| 12. Working with Prompts ([code](examples/Getting_Started/working_with_prompts.py)) | Inspection of Prompt history which is useful in AI Audit scenarios.| -| 13. Hugging Face Integration ([code](examples/Models/huggingface_integration.py)) | How to bring your favorite HF model into llmware seamlessly. Customize a generative model with weights from a custom fine-tuned model. | -| 14. Working with Datasets ([code](examples/Datasets/working_with_datasets.py)) | Dataset generation streamlined for fine-tuning generative and embedding models and formats such as Alpaca, ChatGPT, Human-Bot. | -| 15. Working without Databases ([code](examples/Getting_Started/working_without_a_database.py) / [video](https://www.youtube.com/watch?v=tAGz6yR14lw))| Parse, Prompt and generate Datasets from Prompt history without installing MongoDB or a vector database.| +| 1. BLING models fast start ([code](examples/Models/bling_fast_start.py) / [video](https://www.youtube.com/watch?v=JjgqOZ2v5oU)) | Get started with fast, accurate, CPU-based models - question-answering, key-value extraction, and basic summarization. | +| 2. Parse and Embed 500 PDF Documents ([code](examples/Embedding/docs2vecs_with_milvus-un_resolutions.py)) | End-to-end example for Parsing, Embedding and Querying UN Resolution documents with Milvus | +| 3. Hybrid Retrieval - Semantic + Text ([code](examples/Retrieval/dual_pass_with_custom_filter.py)) | Using 'dual pass' retrieval to combine best of semantic and text search | +| 4. Multiple Embeddings with PG Vector ([code](examples/Embedding/using_multiple_embeddings.py) / [video](https://www.youtube.com/watch?v=Bncvggy6m5Q)) | Comparing Multiple Embedding Models using Postgres / PG Vector | +| 5. DRAGON GGUF Models ([code](examples/Models/dragon_gguf_fast_start.py) / [video](https://www.youtube.com/watch?v=BI1RlaIJcsc&t=130s)) | State-of-the-Art 7B RAG GGUF Models. | +| 6. RAG with BLING ([code](examples/RAG/contract_analysis_on_laptop_with_bling_models.py) / [video](https://www.youtube.com/watch?v=8aV5p3tErP0)) | Using contract analysis as an example, experiment with RAG for complex document analysis and text extraction using `llmware`'s BLING ~1B parameter GPT model running on your laptop. | +| 7. Master Service Agreement Analysis with DRAGON ([code](examples/RAG/msa_processing.py) / [video](https://www.youtube.com/watch?v=Cf-07GBZT68&t=2s)) | Analyzing MSAs using DRAGON YI 6B Model. | +| 8. Streamlit Example ([code](examples/Getting_Started/ui_without_a_database.py)) | Upload pdfs, and run inference on llmware BLING models. | +| 9. Integrating LM Studio ([code](examples/Models/using-open-chat-models.py) / [video](https://www.youtube.com/watch?v=h2FDjUyvsKE&t=101s)) | Integrating LM Studio Models with LLMWare | +| 10. Prompts With Sources ([code](examples/Prompts/prompt_with_sources.py)) | Attach wide range of knowledge sources directly into Prompts. | +| 11. Fact Checking ([code](examples/Prompts/fact_checking.py)) | Explore the full set of evidence methods in this example script that analyzes a set of contracts. | +| 12. Using 7B GGUF Chat Models ([code](examples/Models/chat_models_gguf_fast_start.py)) | Using 4 state of the art 7B chat models in minutes running locally | + Check out: [llmware examples](https://github.com/llmware-ai/llmware/blob/main/examples/README.md) @@ -564,20 +580,27 @@ Check out: [llmware examples](https://github.com/llmware-ai/llmware/blob/main/e
Step 3 - Tutorial Videos - check out our Youtube channel for high-impact 5-10 minute tutorials on the latest examples. -🎬 Check out these videos on how to quickly get started with RAG: +🎬 Check out these videos to get started quickly: +- [SLIM Models Intro] (https://www.youtube.com/watch?v=cQfdaTcmBpY) +- [RAG with BLING on your laptop](https://www.youtube.com/watch?v=JjgqOZ2v5oU) +- [DRAGON-7B-Models](https://www.youtube.com/watch?v=d_u7VaKu6Qk&t=37s) +- [Install and Compare Multiple Embeddings with Postgres and PGVector](https://www.youtube.com/watch?v=Bncvggy6m5Q) +- [Background on GGUF Quantization & DRAGON Model Example](https://www.youtube.com/watch?v=ZJyQIZNJ45E) +- [Using LM Studio Models](https://www.youtube.com/watch?v=h2FDjUyvsKE) +- [Using Ollama Models](https://www.youtube.com/watch?v=qITahpVDuV0) +- [Use any GGUF Model](https://www.youtube.com/watch?v=9wXJgld7Yow) - [Use small LLMs for RAG for Contract Analysis (feat. LLMWare)](https://www.youtube.com/watch?v=8aV5p3tErP0) - [Invoice Processing with LLMware](https://www.youtube.com/watch?v=VHZSaBBG-Bo&t=10s) - [Ingest PDFs at Scale](https://www.youtube.com/watch?v=O0adUfrrxi8&t=10s) - [Evaluate LLMs for RAG with LLMWare](https://www.youtube.com/watch?v=s0KWqYg5Buk&t=105s) - [Fast Start to RAG with LLMWare Open Source Library](https://www.youtube.com/watch?v=0naqpH93eEU) - [Use Retrieval Augmented Generation (RAG) without a Database](https://www.youtube.com/watch?v=tAGz6yR14lw) -- [RAG using CPU-based (No-GPU required) Hugging Face Models with LLMWare on your laptop](https://www.youtube.com/watch?v=JjgqOZ2v5oU) - [Pop up LLMWare Inference Server](https://www.youtube.com/watch?v=qiEmLnSRDUA&t=20s) -- [DRAGON-7B-Models](https://www.youtube.com/watch?v=d_u7VaKu6Qk&t=37s) +
-## Data Store Options: +## Data Store Options
Fast Start: use SQLite3 and FAISS out-of-the-box - no install required @@ -606,7 +629,7 @@ LLMWareConfig().set_vector_db("milvus")
-Postgres: use Postgres for both both text collection and vector DB - install with Docker Compose +Postgres: use Postgres for both text collection and vector DB - install with Docker Compose ```bash curl -o docker-compose.yaml https://raw.githubusercontent.com/llmware-ai/llmware/main/docker-compose-pgvector.yaml @@ -623,7 +646,7 @@ LLMWareConfig().set_vector_db("postgres")
Mix-and-Match: LLMWare supports 3 text collection databases (Mongo, Postgres, SQLite) and -7 vector databases (Milvus, PGVector-Postgres, Redis, Mongo-Atlas, Qdrant, Faiss, and Pinecone) +10 vector databases (Milvus, PGVector-Postgres, Neo4j, Redis, Mongo-Atlas, Qdrant, Faiss, LanceDB, ChromaDB and Pinecone) ```bash # scripts to deploy other options @@ -632,10 +655,17 @@ curl -o docker-compose.yaml https://raw.githubusercontent.com/llmware-ai/llmware
+## Meet our Models + +- **SLIM model series:** small, specialized models fine-tuned for function calling and multi-step, multi-model Agent workflows. +- **DRAGON model series:** Production-grade RAG-optimized 6-7B parameter models - "Delivering RAG on ..." the leading foundation base models. +- **BLING model series:** Small CPU-based RAG-optimized, instruct-following 1B-3B parameter models. +- **Industry BERT models:** out-of-the-box custom trained sentence transformer embedding models fine-tuned for the following industries: Insurance, Contracts, Asset Management, SEC. +- **GGUF Quantization:** we provide 'gguf' and 'tool' versions of many SLIM, DRAGON and BLING models, optimized for CPU deployment. -## Accessing LLMs and setting-up API keys & secrets +## Using LLMs and setting-up API keys & secrets -To use LLMWare, you do not need to use any proprietary LLM - we would encourage you to experiment with [BLING](https://huggingface.co/llmware), [DRAGON](https://huggingface.co/llmware), [Industry-BERT](https://huggingface.co/llmware), the GGUF examples, along with bringing in your favorite models from HuggingFace and Sentence Transformers. +LLMWare is an open platform and supports a wide range of open source and proprietary models. To use LLMWare, you do not need to use any proprietary LLM - we would encourage you to experiment with [SLIM](https://www.huggingface.co/llmware/), [BLING](https://huggingface.co/llmware), [DRAGON](https://huggingface.co/llmware), [Industry-BERT](https://huggingface.co/llmware), the GGUF examples, along with bringing in your favorite models from HuggingFace and Sentence Transformers. If you would like to use a proprietary model, you will need to provide your own API Keys. API keys and secrets for models, aws, and pinecone can be set-up for use in environment variables or passed directly to method calls. @@ -658,7 +688,9 @@ git clone git@github.com:llmware-ai/llmware.git - πŸ’‘ Developing small specialized RAG optimized LLMs between 1B-7B parameters - πŸ’‘ Industry-specific LLMs, embedding models and processes to support core knowledge-based use cases - πŸ’‘ Enterprise scalability - containerization, worker deployments and Kubernetes -- πŸ’‘ Integration of SQL and other scale enterprise data sources +- πŸ’‘ Integration of SQL and other scale enterprise data sources +- πŸ’‘ Multi-step, multi-model Agent-based workflows with small, specialized function-calling models + Like our models, we aspire for llmware to be "small, but mighty" - easy to use and get started, but packing a powerful punch! @@ -670,9 +702,26 @@ Questions and discussions are welcome in our [github discussions](https://github ## πŸ“£ Release notes and Change Log -**Supported Operating Systems**: MacOS (Metal and x86), Linux (x86 and aarch64), Windows ** +**Friday, February 16 - v0.2.3 WIP Update** +- Added 10+ embedding models to ModelCatalog - nomic, jina, bge, gte, ember and uae-large. +- Updated OpenAI support >=1.0 and new text-3 embedding models. +- SLIM model keys and output_values now accessible in ModelCatalog. +- Updating encodings to 'utf-8-sig' to better handle txt/csv files with bom. + +**Reported notable issues on priority resolution path** +- stablelm-based models using gguf +- older linux versions with GLIBC < 2.34 +- 3.12 python support - waiting on one last dependency (coming soon) + +**Supported Operating Systems**: MacOS (Metal and x86), Linux (x86 and aarch64), Windows +- note on Linux: we test most extensively on Ubuntu 22 and recommend where possible +- if you need another Linux version, please raise an issue - we will prioritize testing and ensure support. + +**Supported Vector Databases**: Milvus, Postgres (PGVector), Neo4j, Redis, LanceDB, ChromaDB, Qdrant, FAISS, Pinecone, Mongo Atlas Vector Search + +**Supported Text Index Databases**: MongoDB, Postgres, SQLite + -**Supported Vector Databases**: Milvus, Postgres (PGVector), Redis, FAISS, Pinecone, Mongo Atlas Vector Search, FAISS
Optional @@ -686,7 +735,7 @@ Questions and discussions are welcome in our [github discussions](https://github
🚧 Change Log -**Latest Updates - 19 Jan 2024 - Coming soon!: llmware v0.2.0** +**Latest Updates - 19 Jan 2024 - llmware v0.2.0** - Added new database integration options - Postgres and SQlite - Improved status update and parser event logging options for parallelized parsing - Significant enhancements to interactions between Embedding + Text collection databases diff --git a/devcontainer/README b/devcontainer/README new file mode 100644 index 00000000..b4b2e23f --- /dev/null +++ b/devcontainer/README @@ -0,0 +1,4 @@ +If you wish to use devcontainers for development in vscode. You will need to rename or create a symlink of this directory to .devcontainer and reload your window. This will trigger the an option to open the code in a container. Once the code is opened in a container you will be able to contribute like normally without having to install all the dependencies on your local system. Also, the development container provides access to your local home directory via the /code directory. + +quick how-to: +run this command on linux from the root llmware: ln -s devcontainer .devcontainer diff --git a/devcontainer/devcontainer.json b/devcontainer/devcontainer.json new file mode 100644 index 00000000..d71f9d5c --- /dev/null +++ b/devcontainer/devcontainer.json @@ -0,0 +1,34 @@ +{ + "name": "LLMWARE Dev", + //"build": { "dockerfile": "../Dockerfile" }, + "image": "provocoai/llmware:dev-01", // + "RemoteUser": "${localEnv:USER}", + + + "runArgs": [ + "--name", + "${localWorkspaceFolderBasename}", // Container name + "-it", + "-l", + "com.docker.compose.project=devcontainers" // Container group name + ], + // you can setup your local directory in the devcontainer here. The mount line is an example and mounts your home directory into the /code directory + "mounts" : [ + //"source=${localEnv:HOME},target=/code,type=bind,consistency=cached" + ], + "features": { + "ghcr.io/devcontainers/features/docker-outside-of-docker:1": { + "dockerDashComposeVersion": "v2" + }, + "ghcr.io/devcontainers/features/github-cli:1": {} + }, + "customizations": { + "vscode": { + "extensions": [ + "esbenp.prettier-vscode", // prettify the code extension + "ms-python.python", //python code extensions + "ms-python.vscode-pylance" / vscode python extension + ] + } + } +} diff --git a/examples/Embedding/using_chromadb.py b/examples/Embedding/using_chromadb.py new file mode 100644 index 00000000..01178c79 --- /dev/null +++ b/examples/Embedding/using_chromadb.py @@ -0,0 +1,115 @@ + +"""This example shows how to use ChromaDB as a vector embedding database with llmware""" + +""" (A) Python Dependencies - + + As a first step, you should pip install the ChromaDB, which is not included in the llmware package: + 1. pip3 install chromadb + + (B) Using ChromaDB - + + Installing ChromaDB via pip installs everything you need. + However, if you need help, there are many great online sources and communities, e.g.,: + -- ChromaDB documentation - https://docs.trychroma.com/ + -- Docker - https://hub.docker.com/u/chromadb + -- please also see the docker-compose-chromadb.yaml script provided in the llmware script repository + + (C) Configurations - + + You can configure ChromaDB with environment variables. Here is the list of variable names we currently + support - for more information see ChromaDBConfig. + -- CHROMADB_COLLECTION + -- CHROMADB_PERSISTENT_PATH + -- CHROMADB_HOST + -- CHROMADB_PORT + -- CHROMADB_SSL + -- CHROMADB_HEADERS + -- CHROMADB_SERVER_AUTH_PROVIDER + -- CHROMADB_SERVER_AUTH_CREDENTIALS_PROVIDER + -- CHROMADB_SERVER_AUTH_CREDENTIALS_PROVIDER + -- CHROMADB_PASSWORD + -- CHROMADB_SERVER_AUTH_CREDENTIALS_FILE + -- CHROMADB_SERVER_AUTH_CREDENTIALS + -- CHROMADB_SERVER_AUTH_TOKEN_TRANSPORT_HEADER +""" + + +import os + +from llmware.setup import Setup +from llmware.library import Library +from llmware.retrieval import Query + +# example with using ChromaDB as an in-memory database +os.environ["CHROMADB_COLLECTION"] = "llmware" + +# note: in default mode, Chroma will persist in memory only - to persist to disk, then uncomment the following line and add local folder path: +# os.environ["CHROMA_PERSISTENT_PATH"] = "/local/folder/path/to/save/chromadb/" + + +def build_lib (library_name, folder="Agreements"): + + # Step 1 - Create library which is the main 'organizing construct' in llmware + print ("\nupdate: Step 1 - Creating library: {}".format(library_name)) + + library = Library().create_new_library(library_name) + + # Step 2 - Pull down the sample files from S3 through the .load_sample_files() command + # --note: if you need to refresh the sample files, set 'over_write=True' + print ("update: Step 2 - Downloading Sample Files") + + sample_files_path = Setup().load_sample_files(over_write=False) + + # Step 3 - point ".add_files" method to the folder of documents that was just created + # this method parses the documents, text chunks, and captures in MongoDB + print("update: Step 3 - Parsing and Text Indexing Files") + + # options: Agreements | UN-Resolutions-500 + library.add_files(input_folder_path=os.path.join(sample_files_path, folder)) + + return library + + +# start script + +print("update: Step 1- starting here- building library- parsing PDFs into text chunks") + +lib = build_lib("chromadb_lib_0") + +# optional - check the status of the library card and embedding +lib_card = lib.get_library_card() +print("update: -- before embedding process - check library card - ", lib_card) + +print("update: Step 2 - starting to install embeddings") + +# alt embedding models - "mini-lm-sbert" | industry-bert-contracts | text-embedding-ada-002 +# note: if you want to use text-embedding-ada-002, you will need an OpenAI key and enter into os.environ variable +# e.g., os.environ["USER_MANAGED_OPENAI_API_KEY"] = "" + +# batch sizes from 100-500 usually give good performance and work on most environments +lib.install_new_embedding(embedding_model_name="industry-bert-contracts",vector_db="chromadb",batch_size=300) + +# optional - check the status of the library card and embedding +lib_card = lib.get_library_card() +print("update: -- after embedding process - check updated library card - ", lib_card) + +# run a query +# note: embedding_model_name is optional, but useful if you create multiple embeddings on the same library +# --see other example scripts for multiple embeddings + +# create query object +query_chromadb = Query(lib, embedding_model_name="industry-bert-contracts") + +# run multiple queries using query_chromadb +my_search_results = query_chromadb.semantic_query("What is the sale bonus?", result_count = 24) + +for i, qr in enumerate(my_search_results): + print("update: semantic query results: ", i, qr) + +# if you want to delete the embedding - uncomment the line below +# lib.delete_installed_embedding("industry-bert-contracts", "chromadb") + +# optional - check the embeddings on the library +emb_record = lib.get_embedding_status() +for j, entries in enumerate(emb_record): + print("update: embeddings on library: ", j, entries) diff --git a/examples/README.md b/examples/README.md index 3bf1d071..f4536517 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,44 +1,24 @@ -# Getting started with `llmware` +# πŸ”₯ Top New Examples πŸ”₯ + +New to LLMWare - [**Fast Start tutorial series**](https://github.com/llmware-ai/llmware/tree/main/fast_start) +SLIM Examples - [**SLIM Models**](SLIM-Agents/) | Example | Detail | |-------------|--------------| -| 1. Getting Started ([code](Getting_Started/getting_started_with_rag.py) / [video](https://www.youtube.com/watch?v=0naqpH93eEU)) | End-to-end Basic RAG Recipe illustrating key LLMWare classes. | -| 2. Prompts ([code](Prompts/llm_prompts.py)) | Prompt LLMs with various sources, explore the out-of-the-box Prompt Catalog, and use different prompt styles.| -| 3. Retrieval ([code](Retrieval/semantic_retrieval.py)) | Explore the breadth of retrieval capabilities and persisting, loading and saving retrieval history.| -| 4. Embedding ([code](Embedding/embeddings_fast_start.py)) | Simple access to multiple embedding models and vector DBs (β€œmix and match”). -| 5. Parsing ([code](Parsing/parse_documents.py)) | Ingest at scale into library and β€˜at runtime' into any Prompt. -| 6. Prompts With Sources ([code](Prompts/prompt_with_sources.py)) | Attach wide range of knowledge sources directly into Prompts. -| 7. BLING models ([code](Models/bling_fast_start.py) / [video](https://www.youtube.com/watch?v=JjgqOZ2v5oU)) | Explore `llmware`'s BLING model series ("Best Little Instruction-following No-GPU-required"). See how they perform in common RAG scenarios - question-answering, key-value extraction, and basic summarization. | -| 8. RAG with BLING ([code](RAG/contract_analysis_on_laptop_with_bling_models.py) / [video](https://www.youtube.com/watch?v=8aV5p3tErP0)) | Using contract analysis as an example, experiment with RAG for complex document analysis and text extraction using `llmware`'s BLING ~1B parameter GPT model running on your laptop. | -| 9. DRAGON RAG benchmark testing with huggingface ([code](Models/dragon_rag_benchmark_tests_huggingface.py)) | Run RAG instruct benchmark tests against the `llmware` DRAGON models to find the best one for your RAG workflow. This example uses basic Transformer APIs. | -| 10. DRAGON RAG benchmark testing with llmware ([code](Models/dragon_rag_benchmark_tests_llmware.py)) | Run RAG instruct benchmark tests against the `llmware` DRAGON models to find the best one for your RAG workflow. This example uses the llmware Prompt API which provides additional capabilities such as evidence/fact checking | -| 11. Fact Checking ([code](Prompts/fact_checking.py)) | Explore the full set of evidence methods in this example script that analyzes a set of contracts. | -| 12. Working with Prompts ([code](Getting_Started/working_with_prompts.py)) | Inspection of Prompt history which is useful in AI Audit scenarios.| -| 13. Hugging Face Integration ([code](Models/huggingface_integration.py)) | How to bring your favorite HF model into llmware seamlessly. Customize a generative model with weights from a custom fine-tuned model. | -| 14. Working with Datasets ([code](Datasets/working_with_datasets.py)) | Dataset generation streamlined for fine-tuning generative and embedding models and formats such as Alpaca, ChatGPT, Human-Bot. | -| 15. Working without Databases ([code](Getting_Started/working_without_a_database.py) / [video](https://www.youtube.com/watch?v=tAGz6yR14lw))| Parse, Prompt and generate Datasets from Prompt history without installing MongoDB or a vector database.| -| 16. Working without Databases with a minimal Web UI([code](Getting_Started/ui_without_a_database.py) | Upload pdfs, and run inference on llmware BLING models without installing MongoDB or a vector database.| - - -# Using `llmware` without a database -You can do some interesting things using `llmware` without a database or vector embeddings. Parsing can be done in memory and outputted to text or json. Prompts can be crafted with sources from files, Wikipedia or the Yahoo Finance API. The **Working without Databases** ([code](Getting_Started/working_without_a_database.py) / [video](https://www.youtube.com/watch?v=tAGz6yR14lw)), [LLM Prompts](Getting_Started/working_with_prompts.py), and [Parsing](Parsing/parse_documents.py) examples show scenarios that can be accomplished and throughout the examples are specific methods that do not require MongoDB or embeddings. - -# `llmware` Open Source Models -The `llmware` public model repository has 3 model collections: -- **Industry BERT models:** out-of-the-box custom trained sentence transformer embedding models fine-tuned for the following industries: Insurance, Contracts, Asset Management, SEC. -- **BLING model series:** Small CPU-based RAG-optimized, instruct-following 1B-3B parameter models. -- **DRAGON model series:** Production-grade RAG-optimized 6-7B parameter models - "Delivering RAG on ..." the leading foundation base models. - -These models collections are available at [`llmware` on Hugging Face](https://huggingface.co/llmware). Explore their use in the [Embedding](Embedding/embeddings_fast_start.py), [Hugging Face Integration](Models/huggingface_integration.py),[`llmware` BLING model](Models/bling_fast_start.py), [RAG with BLING](RAG/contract_analysis_on_laptop_with_bling_models.py), and [RAG benchmark testing](Models/dragon_rag_benchmark_tests_llmware.py) examples. - -# Additional `llmware` capabilities -- Create knowledge graphs with a high-powered and fast C-based co-occurrence table matrix builder, the output of which can feed NLP statistics as well as potentially graph databases. Explore the [Knowledge Graph](Datasets/knowledge_graph.py) example. - -- Generate datasets for fine-tuning both generative and embedding models. `llmware` uses sophisticated data-crafting strategies, and leveraging the data captured throughout the system. Explore the [Datasets](Datasets/working_with_datasets.py) example. - -- Library is the simple, flexible, unifying construct in `llmware` to assemble and normalize parsed text chunks, and is linked to both a text search index, and an open platform of embedding models and vector databases. Explore the [Working with Libraries](Getting_Started/working_with_libraries.py) example. - -- The `llmware` parsers follow a consistent 27 key metadata dictionary, so that you can extract the same information from a PDF as a PowerPoint or Text file. The parsers generally extract images, tables, and all available document metadata. There is a complete set of text chunking tools to parse a batch of documents (across multiple formats) and chunk and store in consistent format in a document store. Explore the [Parsing](Parsing/parse_documents.py) example. +| 1. BLING models fast start ([code](Models/bling_fast_start.py) / [video](https://www.youtube.com/watch?v=JjgqOZ2v5oU)) | Get started with fast, accurate, CPU-based models - question-answering, key-value extraction, and basic summarization. | +| 2. Parse and Embed 500 PDF Documents ([code](Embedding/docs2vecs_with_milvus-un_resolutions.py)) | End-to-end example for Parsing, Embedding and Querying UN Resolution documents with Milvus | +| 3. Hybrid Retrieval - Semantic + Text ([code](Retrieval/dual_pass_with_custom_filter.py)) | Using 'dual pass' retrieval to combine best of semantic and text search | +| 4. Multiple Embeddings with PG Vector ([code](Embedding/using_multiple_embeddings.py) / [video](https://www.youtube.com/watch?v=Bncvggy6m5Q)) | Comparing Multiple Embedding Models using Postgres / PG Vector | +| 5. DRAGON GGUF Models ([code](Models/dragon_gguf_fast_start.py) / [video](https://www.youtube.com/watch?v=BI1RlaIJcsc&t=130s)) | State-of-the-Art 7B RAG GGUF Models. | +| 6. RAG with BLING ([code](RAG/contract_analysis_on_laptop_with_bling_models.py) / [video](https://www.youtube.com/watch?v=8aV5p3tErP0)) | Using contract analysis as an example, experiment with RAG for complex document analysis and text extraction using `llmware`'s BLING ~1B parameter GPT model running on your laptop. | +| 7. Master Service Agreement Analysis with DRAGON ([code](RAG/msa_processing.py) / [video](https://www.youtube.com/watch?v=Cf-07GBZT68&t=2s)) | Analyzing MSAs using DRAGON YI 6B Model. | +| 8. Streamlit Example ([code](Getting_Started/ui_without_a_database.py)) | Upload pdfs, and run inference on llmware BLING models. | +| 9. Integrating LM Studio ([code](Models/using-open-chat-models.py) / [video](https://www.youtube.com/watch?v=h2FDjUyvsKE&t=101s)) | Integrating LM Studio Models with LLMWare | +| 10. Prompts With Sources ([code](Prompts/prompt_with_sources.py)) | Attach wide range of knowledge sources directly into Prompts. | +| 11. Fact Checking ([code](Prompts/fact_checking.py)) | Explore the full set of evidence methods in this example script that analyzes a set of contracts. | +| 12. Using 7B GGUF Chat Models ([code](Models/chat_models_gguf_fast_start.py)) | Using 4 state of the art 7B chat models in minutes running locally | + + +Check back from time-to-time as we are always updating these examples - especially with new use cases and contributions from the llmware Community! -- All data artifacts are published in standard formats – json, txt files, pytorch_model.bin files, and fully portable and exportable to any platform. diff --git a/examples/SLIM-Agents/README.md b/examples/SLIM-Agents/README.md new file mode 100644 index 00000000..16fa2e25 --- /dev/null +++ b/examples/SLIM-Agents/README.md @@ -0,0 +1,68 @@ + πŸš€ Start Building Multi-Model Agents Locally on a Laptop πŸš€ +=============== + +**What is a SLIM?** + +**SLIMs** are **S**tructured **L**anguage **I**nstruction **M**odels, which are small, specialized 1B parameter LLMs, +finetuned to generate structured outputs (Python dictionaries, JSON and SQL) that can be handled programmatically, and +stacked together in multi-step, multi-model Agent workflows - all running on a local CPU. + +**Check out the Intro videos** +[SLIM Intro Video](https://www.youtube.com/watch?v=cQfdaTcmBpY) + +There are 10 SLIM models, each delivered in two packages - a Pytorch/Huggingface FP16 model, and a +quantized "tool" designed for fast inference on a CPU, using LLMWare's embedded GGUF inference engine. In most cases, +we would recommend that you start with the "tools" version of the models. + +**Getting Started** + +We have several ready-to-run examples in this repository: + +| Example | Detail | +|-----------------------------------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------------| +| 1. Getting Started with SLIM Models ([code](slims-getting-started.py) / [video](https://www.youtube.com/watch?v=aWZFrTDmMPc&t=196s)) | Install the models and run hello world tests to see the models in action. | +| 2. Getting Started with Function-Calling Agent ([code](agent-llmfx-getting-started.py) / [video](https://www.youtube.com/watch?v=cQfdaTcmBpY)) | Generate a Structured Report with LLMfx | +| 3. Multi-step Complex Analysis with Agent ([code](agent-multistep-analysis.py) / [video](https://www.youtube.com/watch?v=y4WvwHqRR60)) | Delivering Complex Research Analysis with SLIM Agents | | +| 4. Document Clustering ([code](document-clustering.py)) | Multi-faceted automated document analysis with Topics, Tags and NER | +| 5. Two-Step NER Retrieval ([code](ner-retrieval.py)) | Using NER to extract name, and then using as basis for retrieval. | | +| 6. Using Sentiment Analysis ([code](sentiment-analysis.py)) | Using sentiment analysis on earnings transcripts and a 'if...then' condition | +| 7. Text2SQL - Intro ([code](text2sql-getting-started-1.py)) | Getting Started with SLIM-SQL-TOOL and Basic Text2SQL Inference | | +| 8. Text2SQL - E2E ([code](text2sql-end-to-end-2.py)) | End-to-End Natural Langugage Query to SQL DB Query | | +| 9. Text2SQL - MultiStep ([code](text2sql-multistep-example-3.py)) | Extract a customer name using NER and use in a Text2SQL query | + + +For information on all of the SLIM models, check out [LLMWare SLIM Model Collection](https://www.huggingface.co/llmware/). + +**Models List** +If you would like more information about any of the SLIM models, please check out their model card: +- ner - extract named entities - [slim-ner](https://www.huggingface.co/llmware/slim-ner) & [slim-ner-tool](https://www.huggingface.co/llmware/slim-ner-tool) +- sentiment - evaluate sentiment - [slim-sentiment](https://www.huggingface.co/slim-sentiment) & [slim-sentiment-tool](https://www.huggingface.co/llmware/slim-sentiment-tool) +- topics - generate topic - [slim-topics](https://www.huggingface.co/slim-topics) & [slim-topics-tool](https://www.huggingface.co/llmware/slim-topics-tool) +- ratings - apply 1 (low) - 5 (high) rating - [slim-ratings](https://www.huggingface.co/slim-ratings) & [slim-ratings-tool](https://www.huggingface.co/llmware/slim-ratings-tool) +- emotions - assess emotions - [slim-emotions](https://www.huggingface.co/slim-emotions) & [slim-emotions-tool](https://www.huggingface.co/llmware/slim-emotions-tool) +- tags - auto-generate list of tags - [slim-tags](https://www.huggingface.co/slim-tags) & [slim-tags-tool](https://www.huggingface.co/llmware/slim-tags-tool) +- intent - identify intent - [slim-intent](https://www.huggingface.co/slim-intent) & [slim-intent-tool](https://www.huggingface.co/llmware/slim-intent-tool) +- category - high-level category - [slim-category](https://www.huggingface.co/slim-category) & [slim-category-tool](https://wwww.huggingface.co/llmware/slim-category-tool) +- nli - assess if evidence supports conclusion - [slim-nli](https://www.huggingface.co/slim-nli) & [slim-nli-tool](https://www.huggingface.co/llmware/slim-nli-tool) +- sql - convert text into sql - [slim-sql](https://www.huggingface.co/slim-sql) & [slim-sql-tool](https://www.huggingface.co/llmware/slim-sql-tool) + +You may also want to check out these quantized 'answer' tools, which work well in conjunction with SLIMs for question-answer and summarization: +- bling-answer-tool - 1b quantized RAG model - [bling-answer-tool](https://www.huggingface.co/llmware/bling-answer-tool) +- dragon-yi-answer-tool - 6b quantized RAG model - [dragon-yi-answer-tool](https://www.huggingface.co/llmware/dragon-yi-answer-tool) +- dragon-mistral-answer-tool - 7b quantized RAG model - [dragon-mistral-answer-tool](https://www.huggingface.co/llmware/dragon-mistral-answer-tool) +- dragon-llama-answer-tool - 7b quantized RAG model - [dragon-llama-answer-tool](https://www.huggingface.co/llmware/dragon-llama-answer-tool) + + +**Set up** +No special setup for SLIMs is required other than to install llmware >=0.2.2, e.g., `pip3 install llmware`. + +**Platforms:** +- Mac M1, Mac x86, Windows, Linux (Ubuntu 22 preferred) +- RAM: 16 GB minimum +- Python 3.9, 3.10, 3.11 (note: not supported on 3.12 yet) +- llmware >= 0.2.2 version + + +### **Let's get started! πŸš€** + + diff --git a/examples/SLIM-Agents/agent-llmfx-getting-started.py b/examples/SLIM-Agents/agent-llmfx-getting-started.py new file mode 100644 index 00000000..c526448a --- /dev/null +++ b/examples/SLIM-Agents/agent-llmfx-getting-started.py @@ -0,0 +1,82 @@ + +""" Using SLIM tools as part of an agent workflow - introducing LLMfx class - this example shows how to: + + 1. Create an agent using the LLMfx class. + 2. Load multiple specialized tools for the agent. + 3. Execute a series of function-calls. + 4. Generate a consolidated automatic dictionary report. + +""" + + +from llmware.models import ModelCatalog +from llmware.agents import LLMfx + + +def create_multistep_report(customer_transcript): + + """ Creating a multi-step, multi-model agent workflow """ + + # create an agent using LLMfx class + agent = LLMfx() + + agent.load_work(customer_transcript) + + # load tools individually + agent.load_tool("sentiment") + agent.load_tool("ner") + + # load multiple tools + agent.load_tool_list(["emotions", "topics", "intent", "tags", "ratings", "answer"]) + + # start deploying tools and running various analytics + + # first conduct three 'soft skills' initial assessment using 3 different models + agent.sentiment() + agent.emotions() + agent.intent() + + # alternative way to execute a tool, passing the tool name as a string + agent.exec_function_call("ratings") + + # call multiple tools concurrently + agent.exec_multitool_function_call(["ner","topics","tags"]) + + # the 'answer' tool is a quantized question-answering model - ask an 'inline' question + # the optional 'key' assigns the output to a dictionary key for easy consolidation + agent.answer("What is a short summary?",key="summary") + + # prompting tool to ask a quick question as part of the analytics + response = agent.answer("What is the customer's account number and user name?", key="customer_info") + + # you can 'unload_tool' to release it from memory + agent.unload_tool("ner") + agent.unload_tool("topics") + + # at end of processing, show the report that was automatically aggregated by key + report = agent.show_report() + + # displays a summary of the activity in the process + activity_summary = agent.activity_summary() + + # list of the responses gathered + for i, entries in enumerate(agent.response_list): + print("update: response analysis: ", i, entries) + + output = {"report": report, "activity_summary": activity_summary, "journal": agent.journal} + + return output + + +if __name__ == "__main__": + + # sample customer transcript + + customer_transcript = "My name is Michael Jones, and I am a long-time customer. " \ + "The Mixco product is not working currently, and it is having a negative impact " \ + "on my business, as we can not deliver our products while it is down. " \ + "This is the fourth time that I have called. My account number is 93203, and " \ + "my user name is mjones. Our company is based in Tampa, Florida." + + output = create_multistep_report(customer_transcript) + diff --git a/examples/SLIM-Agents/agent-multistep-analysis.py b/examples/SLIM-Agents/agent-multistep-analysis.py new file mode 100644 index 00000000..41a3cab7 --- /dev/null +++ b/examples/SLIM-Agents/agent-multistep-analysis.py @@ -0,0 +1,102 @@ + +""" This example shows a complex multi-part research analysis. In this example, we will: + + 1. Build a "research" library. + 2. Query the research library to identify topics of interest. + 3. Create an agent with several analytical tools: sentiment, emotions, topic, entities analysis + 4. Pass the results of our query to the agent to conduct multifaceted analysis. + 5. Apply a top-level filter ('sentiment') on the results from the query + 6. For any of the passages with negative sentiment, we will run a follow-up set of analyses. + 7. Finally, we will assemble the follow-up analysis into a list of detailed reports. +""" + +import os +import shutil + +from llmware.agents import LLMfx +from llmware.library import Library +from llmware.retrieval import Query +from llmware.configs import LLMWareConfig +from llmware.setup import Setup + + +def multistep_analysis(): + + """ In this example, our objective is to research Microsoft history and rivalry in the 1980s with IBM. """ + + # step 1 - assemble source documents and create library + + print("update: Starting example - agent-multistep-analysis") + + # note: lines 38-49 attempt to automatically pull sample document into local path + # depending upon permissions in your environment, you may need to set up directly + # if you pull down the samples files with Setup().load_sample_files(), in the Books folder, + # you will find the source: "Bill-Gates-Biography.pdf" + # if you have pulled sample documents in the past, then to update to latest: set over_write=True + + print("update: Loading sample files") + + sample_files_path = Setup().load_sample_files(over_write=False) + bill_gates_bio = "Bill-Gates-Biography.pdf" + path_to_bill_gates_bio = os.path.join(sample_files_path, "Books", bill_gates_bio) + + microsoft_folder = os.path.join(LLMWareConfig().get_tmp_path(), "example_microsoft") + + print("update: attempting to create source input folder at path: ", microsoft_folder) + + if not os.path.exists(microsoft_folder): + os.mkdir(microsoft_folder) + os.chmod(microsoft_folder, 0o777) + shutil.copy(path_to_bill_gates_bio,os.path.join(microsoft_folder, bill_gates_bio)) + + # create library + print("update: creating library and parsing source document") + + LLMWareConfig().set_active_db("sqlite") + my_lib = Library().create_new_library("microsoft_history_0210_1") + my_lib.add_files(microsoft_folder) + + # run our first query - "ibm" + query = "ibm" + search_results = Query(my_lib).text_query(query) + print(f"update: executing query to filter to key passages - {query} - results found - {len(search_results)}") + + # create an agent and load several tools that we will be using + agent = LLMfx() + agent.load_tool_list(["sentiment", "emotions", "topic", "tags", "ner", "answer"]) + + # load the search results into the agent's work queue + agent.load_work(search_results) + + while True: + + agent.sentiment() + + if not agent.increment_work_iteration(): + break + + # analyze sections where the sentiment on ibm was negative + follow_up_list = agent.follow_up_list(key="sentiment", value="negative") + + for job_index in follow_up_list: + + # follow-up 'deep dive' on selected text that references ibm negatively + agent.set_work_iteration(job_index) + agent.exec_multitool_function_call(["tags", "emotions", "topics", "ner"]) + agent.answer("What is a brief summary?", key="summary") + + my_report = agent.show_report(follow_up_list) + + activity_summary = agent.activity_summary() + + for entries in my_report: + print("my report entries: ", entries) + + return my_report + + +if __name__ == "__main__": + + multistep_analysis() + + diff --git a/examples/SLIM-Agents/document-clustering.py b/examples/SLIM-Agents/document-clustering.py new file mode 100644 index 00000000..fb3b7460 --- /dev/null +++ b/examples/SLIM-Agents/document-clustering.py @@ -0,0 +1,57 @@ + +"""This example demonstrates the use of LLM function calls to perform document clustering and + automated classification of different parts of a document. """ + +from llmware.parsers import Parser +from llmware.agents import LLMfx +from llmware.setup import Setup + +import os + + +def document_clustering_example (): + + samples_fp = Setup().load_sample_files(over_write=True) + agreements_fp = os.path.join(samples_fp, "Agreements") + agreement_files = os.listdir(agreements_fp) + + if len(agreement_files) == 0: + print("something went wrong") + return -1 + + # parsing the first file (could be random) found in the os.listdir in the Agreements sample folder + contract_chunks = Parser().parse_one_pdf(agreements_fp,agreement_files[0]) + + # create a LLMfx object + agent = LLMfx() + + # there are ~65-70 contract_chunks in ~15 page contract - feel free to slice (faster demo), or the whole thing + agent.load_work(contract_chunks[0:5]) + + agent.load_tool_list(["topics","tags", "ner"]) + + while True: + agent.exec_multitool_function_call(["topics", "tags","ner"]) + + if not agent.increment_work_iteration(): + break + + agent.show_report() + + agent.activity_summary() + + # uncomment this to see a full view of all of the responses + """ + for i, entries in enumerate(agent.response_list): + print("response_list: ", i, entries) + """ + + return agent.response_list + + +if __name__ == "__main__": + + analysis= document_clustering_example() + + + diff --git a/examples/SLIM-Agents/ner-retrieval.py b/examples/SLIM-Agents/ner-retrieval.py new file mode 100644 index 00000000..3f86341e --- /dev/null +++ b/examples/SLIM-Agents/ner-retrieval.py @@ -0,0 +1,47 @@ + +""" This example illustrates a common two-step retrieval pattern using a SLIM NER model: + + Step 1: Extract named entity information from a text. In this case, the name of a musician. + Step 2: Use the extracted name information as the basis for a retrieval. In this case, we will use the + extracted named entities to do a lookup in Wikipedia. """ + +from llmware.agents import LLMfx +from llmware.parsers import WikiParser + + +def ner_lookup_retrieval(): + + text = ("The new Miko Marks album is one of the best I have ever heard in a number of years. " + "She is definitely an artist worth exploring further.") + + # create agent + agent = LLMfx() + agent.load_work(text) + agent.load_tool("ner") + named_entities = agent.ner() + ner_dict= named_entities["llm_response"] + + # take named entities found and package into a lookup list + + lookup = [] + for keys, value in ner_dict.items(): + if value: + lookup.append(value) + + for entries in lookup: + + # run a wiki topic query with each of the named entities found + + wiki_info = WikiParser().add_wiki_topic(entries, target_results=1) + + print("update: wiki_info - ", wiki_info) + summary = wiki_info["articles"][0]["summary"] + + print("update: summary - ", summary) + + return 0 + + +if __name__ == "__main__": + + ner_lookup_retrieval() diff --git a/examples/SLIM-Agents/sentiment-analysis.py b/examples/SLIM-Agents/sentiment-analysis.py new file mode 100644 index 00000000..7ea3e509 --- /dev/null +++ b/examples/SLIM-Agents/sentiment-analysis.py @@ -0,0 +1,98 @@ + +""" Sentiment Analysis example - shows how to use the slim-sentiment-tool. In this example, we will: + + 1. Review several summary earnings transcripts, looking to evaluate the overall sentiment as + 'positive', 'negative', or 'neutral' + + 2. Evaluate a single transcript, and apply if...then based on the result and confidence level. + + 3. Run through a list of earnings transcripts with journaling activated to display the multi-step + process on the screen. +""" + +from llmware.agents import LLMfx + +earnings_transcripts = [ + "This is one of the best quarters we can remember for the industrial sector with significant growth across the " + "board in new order volume, as well as price increases in excess of inflation. We continue to see very strong " + "demand, especially in Asia and Europe. Accordingly, we remain bullish on the tier 1 suppliers and would be " + "accumulating more stock on any dips. ", + + "Not the worst results, but overall we view as negative signals on the direction of the economy, and the likely " + "short-term trajectory for the telecom sector, and especially larger market leaders, including AT&T, Comcast, and" + "Deutsche Telekom.", + + "This quarter was a disaster for Tesla, with falling order volume, increased costs and supply, and negative " + "guidance for future growth forecasts in 2024 and beyond.", + + "On balance, this was an average result, with earnings in line with expectations and no big surprises to either " + "the positive or the negative." + ] + + +def get_one_sentiment_classification(text): + + """This example shows a basic use to get a sentiment classification and use the output programmatically. """ + + # simple basic use to get the sentiment on a single piece of text + agent = LLMfx(verbose=True) + agent.load_tool("sentiment") + sentiment = agent.sentiment(text) + + # look at the output + print("sentiment: ", sentiment) + for keys, values in sentiment.items(): + print(f"{keys}-{values}") + + # two key attributes of the sentiment output dictionary + sentiment_value = sentiment["llm_response"]["sentiment"] + confidence_level = sentiment["confidence_score"] + + # use the sentiment classification as a 'if...then' decision point in a process + if "positive" in sentiment_value: + print("sentiment is positive .... will take 'positive' analysis path ...", sentiment_value) + + if "positive" in sentiment_value and confidence_level > 0.8: + print("sentiment is positive with high confidence ... ", sentiment_value, confidence_level) + + return sentiment + + +def review_batch_earning_transcripts(): + + """ This example highlights how to review multiple earnings transcripts and iterate through a batch + using the load_work mechanism. """ + + agent = LLMfx() + agent.load_tool("sentiment") + + # iterating through a larger list of samples + # note: load_work method is a flexible input mechanism - pass a string, list, dictionary or combination, and + # it will 'package' as iterable units of processing work for the agent + + agent.load_work(earnings_transcripts) + + while True: + output = agent.sentiment() + # print("update: test - output - ", output) + if not agent.increment_work_iteration(): + break + + response_output = agent.response_list + + agent.clear_work() + agent.clear_state() + + return response_output + + +if __name__ == "__main__": + + # first - quick illustration of getting a sentiment classification + # and using in an "if...then" + sentiment = get_one_sentiment_classification(earnings_transcripts[0]) + + # second - iterate thru a batch of transcripts and apply a sentiment classification + # response_output = review_batch_earning_transcripts() + + diff --git a/examples/SLIM-Agents/slims-getting-started.py b/examples/SLIM-Agents/slims-getting-started.py new file mode 100644 index 00000000..ce6d3251 --- /dev/null +++ b/examples/SLIM-Agents/slims-getting-started.py @@ -0,0 +1,158 @@ + +""" Getting Started with SLIM classifier function calling models - this script demonstrates seven + mini examples to get started using SLIMs: + + 1. Discover list of SLIM models. + 2. 'Hello World' first inference with SLIM model. + 3. Models vs. Tools + 4. Download and cache the SLIM tools. + 5. Run automated tests to confirm installation and demonstrate output. + 6. Using with LLMWare Prompts. + 7. Using the new LLMfx class. + +""" + +from llmware.models import ModelCatalog +from llmware.agents import LLMfx +from llmware.prompts import Prompt + + +def step1_discover_and_load_slim_models(): + + """ Discover a list of SLIM tools in the Model Catalog """ + + tools = ModelCatalog().list_llm_tools() + tool_map = ModelCatalog().get_llm_fx_mapping() + + print("\nList of SLIM model tools in the ModelCatalog\n") + + for i, tool in enumerate(tools): + model_card = ModelCatalog().lookup_model_card(tool_map[tool]) + print("update: step1 - slim tools: ", i, tool, model_card) + + return 0 + + +def step2_hello_world_slim(): + + """ SLIM models can be identified in the ModelCatalog like any llmware model. Instead of using + inference method, SLIM models are used with the function_call method that prepares a special prompt + instruction, and takes optional parameters. """ + + print("\n'Hello World' Inference Using SLIM Function call\n") + + # load like any other model anytime + model = ModelCatalog().load_model("slim-ner-tool") + response = model.function_call("Michael Johnson was a famous Olympic sprinter from the U.S. in the early 2000s.") + + print("update: step2 - response: ", response) + print("update: step2 - usage: ", response["usage"]) + + return 0 + + +def step3_models_versus_tools(): + + """ All SLIM models are delivered in two different packages - as a traditional 'model' and as a + quantized 'tool.' In most scenarios, the tool is intended to be used for fast inference. """ + + print("\nSLIMs come packaged as 'models' (pytorch) and 'tools' (gguf)\n") + + model = ModelCatalog().load_model("llmware/slim-ner") + response = model.function_call("Michael Johnson was a famous Olympic sprinter from the U.S. in the early 2000s.") + + print("update: step3 - response: ", response) + print("update: step3 - usage: ", response["usage"]) + + return 0 + + +def step4_load_and_cache_slim_tools(): + + """ To cache the SLIM toolkit locally, use .get_llm_toolkit. If you prefer to select specific tools, + then you can pass a tool_list in the method call as shown below. """ + + # get all tools + ModelCatalog().get_llm_toolkit() + + # select specific tools + ModelCatalog().get_llm_toolkit(tool_list=["sentiment", "ner"]) + + return 0 + + +def step5_run_automated_tests(): + + """ Each of these one line commands will locally cache the model and then run a series of tests using + the model to demonstrate its use and confirm that installation locally was successfully. """ + + # running automated tests - see the tools in action + + tools= ["slim-sentiment-tool" , "slim-topics-tool", "slim-ner-tool", "slim-ratings-tool", + "slim-emotions-tool", "slim-intent-tool", "slim-tags-tool", "slim-sql-tool", + "slim-category-tool", "slim-nli-tool"] + + # run tests for one tool + ModelCatalog().tool_test_run("slim-sentiment-tool") + + # run tests for a bunch of tools + for tool in tools: + # excluding sentiment, since ran above as separate test + if tool != "slim-sentiment-tool": + ModelCatalog().tool_test_run(tool) + + return 0 + + +def step6_simple_use_case(): + + """ This illustrates how to run a basic function call inference on a SLIM model used in conjunction with + a LLMWare prompt. """ + + text = ("This is Melinda Wyngardt from Silvertech Ventures. We are extremely unhappy with the delays in closing " + "the loan and are considering whether to cancel and back out of the deal.") + + tags_model = ModelCatalog().load_model("slim-tags-tool") + response = tags_model.function_call(text,get_logits=True) + print("update: step6 - 'tags' response: ", response) + + intent_model = ModelCatalog().load_model("slim-intent-tool") + response2 = intent_model.function_call(text) + print("update: step6 - 'intent' response: ", response2) + + prompter = Prompt().load_model("llmware/bling-tiny-llama-v0") + output = prompter.prompt_main("What is the name of the company?", context=text) + print("update: step6 - 'question/answer' response: ", output) + + return 0 + + +def step7_introducing_llm_fx_class(): + + """ In addition to using SLIM models to 'supplement' primary LLM calls, SLIMs can be orchestrated in a + multi-step, multi-model workflow using the high-level LLMfx() - more examples on LLMfx() are in the next + main example 'agent-llmfx-getting-started.py' """ + + # shift verbose to True to see step-by-step processing on the screen + agent = LLMfx(verbose=False) + agent.load_tool("sentiment") + + text = "That is the worst thing that I have ever heard." + response = agent.exec_function_call("sentiment", text) + + print("update: step 7 - response - ", response) + + return 0 + + +if __name__ == "__main__": + + step1_discover_and_load_slim_models() + step2_hello_world_slim() + step3_models_versus_tools() + step4_load_and_cache_slim_tools() + step5_run_automated_tests() + step6_simple_use_case() + step7_introducing_llm_fx_class() + + diff --git a/examples/SLIM-Agents/text-2-sql-query-db.py b/examples/SLIM-Agents/text-2-sql-query-db.py new file mode 100644 index 00000000..56767ac6 --- /dev/null +++ b/examples/SLIM-Agents/text-2-sql-query-db.py @@ -0,0 +1,182 @@ + +""" This example shows an end-to-end recipe for querying SQL database using only natural language. + + The example shows the following steps: + + 1. Loading "slim-sql-tool" and running initial tests to confirm installation. + 2. Generating a SQL table from a sample CSV file included with the slim-sql-tool install. + 3. Asking basic natural language questions: + A. Looks up the table schema + B. Packages the table schema with query + C. Runs inference to convert text into SQL + D. Queries the database with the generated SQL + E. Returns result + 4. 'Two-step' query (starting on line 133) in which a customer name is pulled from a text using NER, and then + the name is 'dynamically' added to a natural language string, which is then converted using text-to-sql + and querying the database. + 5. All work performed on an integrated 'llmware-sqlite-experimental.db' that can be deleted safely anytime + as part of experimentation lifecycle. + +""" + +import os + +from llmware.agents import SQLTables, LLMfx +from llmware.models import ModelCatalog +from llmware.configs import LLMWareConfig + + +def load_slim_sql_tool(): + + """ First step is to install the slim-sql-tool locally """ + + # to cache locally the slim-sql-tool with config and test files + ModelCatalog().get_llm_toolkit(["sql"]) + + # to run tests to confirm correct installation and see the model in action + # note: the test results will include some minor errors - useful to learn how to sharpen prompts + ModelCatalog().tool_test_run("slim-sql-tool") + + return 0 + + +def hello_world_text_2_sql(): + + """ Illustrates a 'hello world' text-2-sql inference as part of an agent process. """ + sample_table_schema = "CREATE TABLE customer_info (customer_name text, account_number integer, annual_spend integer)" + query = "What are the names of all customers with annual spend greater than $1000?" + + agent = LLMfx(verbose=True) + response = agent.sql(query, sample_table_schema) + + print("update: text-2-sql response - ", response) + + return 0 + + +def build_table(fp, fn, table_name): + + """ This is the key method for taking a CSV file from a folder_path (fp), a proposed new table_name, + and creating a new table directly from the CSV. Note: this is useful for rapid prototyping and + experimentation - but should not be used for any serious production purpose. """ + + sql_db = SQLTables(experimental=True) + x = sql_db.create_new_table_from_csv(fp,fn,table_name=table_name) + print("update: successfully created new db table") + + return 1 + + +def delete_table(table_name): + + """ Start fresh in testing - delete table in experimental local SQLite DB """ + sql_db = SQLTables(experimental=True) + sql_db.delete_table(table_name,confirm_delete=True) + + return True + + +def delete_db(): + + """ Start fresh in testing - deletes SQLite DB and starts over. """ + + sql_db = SQLTables(experimental=True) + sql_db.delete_experimental_db(confirm_delete=True) + + return True + + +def sql_e2e_test_script(table_name="customers1",create_new_table=False): + + """ This is the end-to-end execution script. """ + + # create table if needed to set up + if create_new_table: + + sql_tool_repo_path = os.path.join(LLMWareConfig().get_model_repo_path(), "slim-sql-tool") + + if not os.path.exists(sql_tool_repo_path): + ModelCatalog().load_model("llmware/slim-sql-tool") + + files = os.listdir(sql_tool_repo_path) + + csv_file = "customer_table.csv" + + if csv_file in files: + build_table(sql_tool_repo_path, csv_file, table_name) + else: + print("something has gone wrong - could not find customer_table.csv with slim-sql-tool file package") + + # query starts here + agent = LLMfx() + agent.load_tool("sql") + + # Example 1 - direct query + + query_list = ["Which customers are vip customers?", + "What is the highest annual spend of any customer?", + "Which customer has account number 1234953", + "Which customer has the lowest annual spend?", + "Is Susan Soinsin a vip customer?"] + + for i, query in enumerate(query_list): + + # this method is doing all of the work + # -- looks up the table schema in the db using the table_name + # -- packages the text-2-sql query prompt + # -- executes sql method to convert the prompt into a sql query + # -- attempts to execute the sql query on the db + # -- returns the db results as 'research' output + + response = agent.query_db(query, table=table_name) + + # Example 2 - use in a chain of inferences + + text = ("This is Susan Soinsin calling - I am really upset about the poor customer service, " + "and would like to cancel my service.") + + agent.load_tool("ner") + response = agent.ner(text=text) + customer_name = "No Customer" + + # please note: this is just a demo recipe - any real life scenario would require significant preprocessing + # and error checking. :) + + if "llm_response" in response: + if "people" in response["llm_response"]: + people = response["llm_response"]["people"] + if len(people) > 0: + customer_name = people[0] + + print("ner response: ", customer_name, response) + + # e.g., name = "Susan Soinsin" + + query = f"Is {customer_name} a vip customer?" + + print("query: ", query) + + response = agent.query_db(query, table=table_name) + + print("response: ", response) + + for x in range(0,len(agent.research_list)): + print("research: ", x, agent.research_list[x]) + + return 0 + + +if __name__ == "__main__": + + # first - load and test the tools + load_slim_sql_tool() + + # second - 'hello world' demo of using text2sql model + hello_world_text_2_sql() + + # second - run an end-to-end test + sql_e2e_test_script(table_name="customer1",create_new_table=True) + + # third - delete and start fresh for further testing + delete_table("customer1") + diff --git a/examples/SLIM-Agents/text2sql-end-to-end-2.py b/examples/SLIM-Agents/text2sql-end-to-end-2.py new file mode 100644 index 00000000..af6a5401 --- /dev/null +++ b/examples/SLIM-Agents/text2sql-end-to-end-2.py @@ -0,0 +1,111 @@ + + +""" This example shows an end-to-end recipe for querying SQL database using only natural language. + + The example shows the following steps: + + 1. Loading "slim-sql-tool" and running initial tests to confirm installation. + 2. Generating a SQL table from a sample CSV file included with the slim-sql-tool install. + 3. Asking basic natural language questions: + A. Looks up the table schema + B. Packages the table schema with query + C. Runs inference to convert text into SQL + D. Queries the database with the generated SQL + E. Returns result + 3. All work performed on an integrated 'llmware-sqlite-experimental.db' that can be deleted safely anytime + as part of experimentation lifecycle. + +""" + +import os + +from llmware.agents import SQLTables, LLMfx +from llmware.models import ModelCatalog +from llmware.configs import LLMWareConfig + + +def sql_e2e_test_script(table_name="customers1",create_new_table=False): + + """ This is the end-to-end execution script. """ + + # create table if needed to set up + if create_new_table: + + # looks to pull sample csv 'customer_table.csv' from slim-sql-tool model package files + sql_tool_repo_path = os.path.join(LLMWareConfig().get_model_repo_path(), "slim-sql-tool") + + if not os.path.exists(sql_tool_repo_path): + ModelCatalog().load_model("llmware/slim-sql-tool") + + files = os.listdir(sql_tool_repo_path) + csv_file = "customer_table.csv" + + if csv_file in files: + + # to create a testing table from a csv + sql_db = SQLTables(experimental=True) + sql_db.create_new_table_from_csv(sql_tool_repo_path, csv_file, table_name=table_name) + # end - creating table + + print("update: successfully created new db table") + else: + print("something has gone wrong - could not find customer_table.csv inside the slim-sql-tool file package") + + # query starts here + agent = LLMfx() + agent.load_tool("sql") + + # Pass direct queries to the DB + + query_list = ["Which customers are vip customers?", + "What is the highest annual spend of any customer?", + "Which customer has account number 1234953", + "Which customer has the lowest annual spend?", + "Is Susan Soinsin a vip customer?"] + + for i, query in enumerate(query_list): + + # query_db method is doing all of the work + # -- looks up the table schema in the db using the table_name + # -- packages the text-2-sql query prompt + # -- executes sql method to convert the prompt into a sql query + # -- attempts to execute the sql query on the db + # -- returns the db results as 'research' output + + response = agent.query_db(query, table=table_name) + + for x in range(0,len(agent.research_list)): + print("research: ", x, agent.research_list[x]) + + return 0 + +def delete_table(table_name): + + """ Start fresh in testing - delete table in experimental local SQLite DB """ + + sql_db = SQLTables(experimental=True) + sql_db.delete_table(table_name, confirm_delete=True) + + return True + + +def delete_db(): + + """ Start fresh in testing - deletes SQLite DB and starts over. """ + + sql_db = SQLTables(experimental=True) + sql_db.delete_experimental_db(confirm_delete=True) + + return True + + +if __name__ == "__main__": + + ModelCatalog().get_llm_toolkit() + + # run an end-to-end test + sql_e2e_test_script(table_name="customer1",create_new_table=True) + + # third - delete and start fresh for further testing + delete_table("customer1") + diff --git a/examples/SLIM-Agents/text2sql-getting-started-1.py b/examples/SLIM-Agents/text2sql-getting-started-1.py new file mode 100644 index 00000000..635640cd --- /dev/null +++ b/examples/SLIM-Agents/text2sql-getting-started-1.py @@ -0,0 +1,53 @@ + +""" This 'getting started' example shows the basics of how to start using text2sql model: + + 1. Loading "slim-sql-tool" and running initial tests to confirm installation. + + 2. 'Hello World' demonstration of how to 'package' a text2sql prompt combining a + natural language query with a SQL table schema and run a basic inference to generate SQL output + +""" + + +from llmware.agents import LLMfx +from llmware.models import ModelCatalog + + +def load_slim_sql_tool(): + + """ First step is to install the slim-sql-tool locally """ + + # to cache locally the slim-sql-tool with config and test files + ModelCatalog().get_llm_toolkit(["sql"]) + + # to run tests to confirm correct installation and see the model in action + # note: the test results will include some minor errors - useful to learn how to sharpen prompts + ModelCatalog().tool_test_run("slim-sql-tool") + + return 0 + + +def hello_world_text_2_sql(): + + """ Illustrates a 'hello world' text-2-sql inference as part of an agent process. """ + + sample_table_schema = "CREATE TABLE customer_info (customer_name text, account_number integer, annual_spend integer)" + + query = "What are the names of all customers with annual spend greater than $1000?" + + agent = LLMfx(verbose=True) + response = agent.sql(query, sample_table_schema) + + print("update: text-2-sql response - ", response) + + return response + + +if __name__ == "__main__": + + # first - load and test the tools + load_slim_sql_tool() + + # second - 'hello world' demo of using text2sql model + hello_world_text_2_sql() + diff --git a/examples/SLIM-Agents/text2sql-multistep-example-3.py b/examples/SLIM-Agents/text2sql-multistep-example-3.py new file mode 100644 index 00000000..0a25478b --- /dev/null +++ b/examples/SLIM-Agents/text2sql-multistep-example-3.py @@ -0,0 +1,115 @@ + + +""" This example shows a multi-step SQL query use case - this is an 'innovation scenario' and should be viewed +as a good starting recipe for building your own more complex workflows involving text2sql queries. + + The example shows the following steps: + + 1. Generating a SQL table from a sample CSV file included with the slim-sql-tool install. + 2. 'Two-step' query (starting on line 133) in which a customer name is pulled from a text using NER, and then + the name is 'dynamically' added to a natural language string, which is then converted using text-to-sql + and querying the database. + 3. All work performed on an integrated 'llmware-sqlite-experimental.db' that can be deleted safely anytime + as part of experimentation lifecycle. + +""" + +import os + +from llmware.agents import SQLTables, LLMfx +from llmware.models import ModelCatalog +from llmware.configs import LLMWareConfig + +llmware_path = LLMWareConfig().get_llmware_path() + + +def sql_two_step_query_example(table_name="customers1",create_new_table=False): + + """ This is the end-to-end execution script. """ + + # create table if needed to set up + if create_new_table: + + sql_tool_repo_path = os.path.join(LLMWareConfig().get_model_repo_path(), "slim-sql-tool") + + if not os.path.exists(sql_tool_repo_path): + ModelCatalog().load_model("llmware/slim-sql-tool") + + files = os.listdir(sql_tool_repo_path) + + csv_file = "customer_table.csv" + + if csv_file in files: + sql_db = SQLTables(experimental=True) + sql_db.create_new_table_from_csv(sql_tool_repo_path, csv_file, table_name=table_name) + print("update: successfully created new db table") + + else: + print("something has gone wrong - could not find customer_table.csv with slim-sql-tool file package") + + # query starts here + agent = LLMfx() + agent.load_tool("sql") + agent.load_tool("ner") + + # Multi-step example - extract NER -> create natural language query -> convert SQL -> lookup + + text = ("This is Susan Soinsin calling - I am really upset about the poor customer service, " + "and would like to cancel my service.") + + # Step 1 - extract the customer name using NER + response = agent.ner(text=text) + customer_name = "No Customer" + + # please note: this is just a demo recipe - any real life scenario would require significant preprocessing + # and error checking. :) + + if "llm_response" in response: + if "people" in response["llm_response"]: + people = response["llm_response"]["people"] + if len(people) > 0: + customer_name = people[0] + + print("update: ner response - identified the following people names - ", customer_name, response) + + # Step 2 - use the customer name found in the NER analysis to construct a natural language query + query = f"Is {customer_name} a vip customer?" + + print("update: dynamically created query: ", query) + + response = agent.query_db(query, table=table_name) + + print("update: response: ", response) + + for x in range(0,len(agent.research_list)): + print("research: ", x, agent.research_list[x]) + + return 0 + +def delete_table(table_name): + + """ Start fresh in testing - delete table in experimental local SQLite DB """ + sql_db = SQLTables(experimental=True) + sql_db.delete_table(table_name,confirm_delete=True) + + return True + + +def delete_db(): + + """ Start fresh in testing - deletes SQLite DB and starts over. """ + + sql_db = SQLTables(experimental=True) + sql_db.delete_experimental_db(confirm_delete=True) + + return True + + +if __name__ == "__main__": + + # second - run an end-to-end test + sql_two_step_query_example (table_name="customer1",create_new_table=True) + + # third - delete and start fresh for further testing + delete_table("customer1") + diff --git a/fast_start/example-4-rag-text-query.py b/fast_start/example-4-rag-text-query.py index d1576330..3338954a 100644 --- a/fast_start/example-4-rag-text-query.py +++ b/fast_start/example-4-rag-text-query.py @@ -44,41 +44,49 @@ def example_4a_contract_analysis_from_library (model_name, verbose=False): print (f"\n > Loading model {model_name}...") q = Query(contracts_lib) + + # get a list of all of the unique documents in the library + + # doc id list + doc_list = q.list_doc_id() + print("update: document id list - ", doc_list) + + # filename list + fn_list = q.list_doc_fn() + print("update: filename list - ", fn_list) + prompter = Prompt().load_model(model_name) - for i, contract in enumerate(os.listdir(contracts_path)): + for i, doc_id in enumerate(doc_list): - # exclude potential mac os created file artifact in the samples folder path - if contract != ".DS_Store": - - print("\nAnalyzing contract: ", str(i+1), contract) + print("\nAnalyzing contract: ", str(i+1), doc_id, fn_list[i]) - print("LLM Responses:") - - for question in question_list: + print("LLM Responses:") - query_topic = question["topic"] - llm_question = question["llm_query"] + for question in question_list: - doc_filter = {"file_source": [contract]} - query_results = q.text_query_with_document_filter(query_topic,doc_filter,result_count=5,exact_mode=True) + query_topic = question["topic"] + llm_question = question["llm_query"] - if verbose: - # this will display the query results from the query above - for j, qr in enumerate(query_results): - print("update: querying document - ", query_topic, j, doc_filter, qr) + doc_filter = {"doc_ID": [doc_id]} + query_results = q.text_query_with_document_filter(query_topic,doc_filter,result_count=5,exact_mode=True) - source = prompter.add_source_query_results(query_results) + if verbose: + # this will display the query results from the query above + for j, qr in enumerate(query_results): + print("update: querying document - ", query_topic, j, doc_filter, qr) - # *** this is the call to the llm with the source packaged in the context automatically *** - responses = prompter.prompt_with_source(llm_question, prompt_name="default_with_context", temperature=0.3) + source = prompter.add_source_query_results(query_results) - # unpacking the results from the LLM - for r, response in enumerate(responses): - print("update: llm response - ", llm_question, re.sub("[\n]"," ", response["llm_response"]).strip()) + # *** this is the call to the llm with the source packaged in the context automatically *** + responses = prompter.prompt_with_source(llm_question, prompt_name="default_with_context", temperature=0.3) - # We're done with this contract, clear the source from the prompt - prompter.clear_source_materials() + # unpacking the results from the LLM + for r, response in enumerate(responses): + print("update: llm response - ", llm_question, re.sub("[\n]"," ", response["llm_response"]).strip()) + + # We're done with this contract, clear the source from the prompt + prompter.clear_source_materials() # Save jsonl report to jsonl to /prompt_history folder print("\nPrompt state saved at: ", os.path.join(LLMWareConfig.get_prompt_path(),prompter.prompt_id)) diff --git a/fast_start/example-5-rag-semantic-query.py b/fast_start/example-5-rag-semantic-query.py index 7bd3e447..c09c9d6d 100644 --- a/fast_start/example-5-rag-semantic-query.py +++ b/fast_start/example-5-rag-semantic-query.py @@ -78,7 +78,12 @@ def semantic_rag (library_name, embedding_model_name, llm_model_name): # we will look through the list of semantic query results, and pull the top results for each file for j, entries in enumerate(results): - if entries["file_source"] == contract: + library_fn = entries["file_source"] + if os.sep in library_fn: + # handles difference in windows file formats vs. mac / linux + library_fn = library_fn.split(os.sep)[-1] + + if library_fn == contract: print("Top Retrieval: ", j, entries["distance"], entries["text"]) qr.append(entries) diff --git a/fast_start/example-6-rag-multi-step-query.py b/fast_start/example-6-rag-multi-step-query.py index 24287e4c..40f7ee17 100644 --- a/fast_start/example-6-rag-multi-step-query.py +++ b/fast_start/example-6-rag-multi-step-query.py @@ -48,26 +48,33 @@ def msa_processing(library_name, llm_model_name): # results_only = False will return a dictionary with 4 keys: {"query", "results", "doc_ID", "file_source"} msa_docs = results["file_source"] + msa_doc_ids = results["doc_ID"] # load prompt/llm locally prompter = Prompt().load_model(llm_model_name) - print("update: identified the following msa docs: ", msa_docs) + print("update: identified the following msa doc id: ", msa_doc_ids) # analyze each MSA - "query" & "llm prompt" - for i, docs in enumerate(msa_docs): + for i, doc_id in enumerate(msa_doc_ids): print("\n") - print (i+1, "Reviewing MSA - ", docs) + docs = msa_docs[i] + if os.sep in docs: + # handles difference in windows file formats vs. Mac/Linux + docs = docs.split(os.sep)[-1] + + print (i+1, "Reviewing MSA - ", doc_id, docs) # look for the termination provisions in each document - doc_filter = {"file_source": [docs]} + doc_filter = {"doc_ID": [doc_id]} termination_provisions = q.text_query_with_document_filter("termination", doc_filter) # package the provisions as a source to a prompt sources = prompter.add_source_query_results(termination_provisions) - print("update: sources - ", sources) + # if you want to see more details about how the sources are packaged: uncomment this line- + # print("update: sources - ", sources) # call the LLM and ask our question response = prompter.prompt_with_source("What is the notice for termination for convenience?") diff --git a/llmware/__init__.py b/llmware/__init__.py index 0b049f52..ed695d39 100644 --- a/llmware/__init__.py +++ b/llmware/__init__.py @@ -11,7 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The llmware package provides tools to build large language model (LLM) applications, this includes +a custom set of LLMs that are small and open source. -__version__ = '0.2.0' +The llmware package aspires to be a middleware in LLM applications. In other words, it provides the +infrastructure between the components, such as the models, the prompts, the text databases, and +the vector databases. +""" + + +__version__ = '0.2.3' __author__ = 'llmware' __license__ = 'Apache 2.0 License' diff --git a/llmware/agents.py b/llmware/agents.py new file mode 100644 index 00000000..0dcfeb01 --- /dev/null +++ b/llmware/agents.py @@ -0,0 +1,1250 @@ +"""The agents module implements the two classes LLMfx and SQLTables, where LLMfx manages +Structured Language Instruction Models (SLIMs), the agents and SQLTables handels +creating and accessing external SQL data. LLmfx currently only supports SLIM models, other model +classes will be added over time. And SQLTables is an experimantal feature for creating and accessing SQLite. + +A Structured Language Instruction Model, SLIM for short, is a small specialized multi-modal LLM for function +calling and multi-step workflows. +""" + +import shutil +import logging +import gc +import re +import csv +import os +import sqlite3 + +from llmware.models import ModelCatalog, _ModelRegistry +from llmware.util import CorpTokenizer +from llmware.configs import SQLiteConfig +from llmware.exceptions import ModelNotFoundException + + +class LLMfx: + + """ LLMfx provides a high-level orchestration abstraction that implements multi-model, multi-step processes + with the ability to load and orchestrate multiple SLIM classifier models as 'tools' with centralized journaling, + structured work management and information aggregation. The initial implementation of LLMfx is designed + to support SLIM classifier models, with support for additional model classes to come over time. """ + + def __init__(self, api_key=None, verbose=True, analyze_mode=True): + + if verbose: + print("update: Launching LLMfx process") + + self._supported_tools = _ModelRegistry().get_llm_fx_tools_list() + self._default_tool_map = _ModelRegistry().get_llm_fx_mapping() + + for tools in self._supported_tools: + setattr(self, tools + "_model", None) + + self.api_key = api_key + + self.work_queue = [] + self.work_iteration = 0 + + self.verbose = verbose + self.analyze_mode = analyze_mode + + # report is a list of dictionaries, with each dictionary linked to a work item number + # reports are automatically aggregated through the lifecycle of the object + self.report = [] + + # response list provides a list of the llm tool responses + self.response_list = [] + + # research list provides a list of any research gathered (specifically from SQLTables currently) + self.research_list = [] + + # journal keeps a running journal output used in 'verbose' mode to the screen display + self.journal = [] + self.step = 0 + + journal_update = f"creating object - ready to start processing." + self.write_to_journal(journal_update) + + self.tools_deployed = [] + self.inference_calls = 0 + + def update_tool_map(self, tool_type, tool_name): + + """ Updates tool mapping for LLMfx instance - enables swapping in other models. """ + + if tool_type: + if tool_type in self._supported_tools: + + # unload tool if currently being used + self.unload_tool(tool_type) + + # create new mapping + self._default_tool_map.update({tool_type: tool_name}) + + # load new tool + self.load_tool(tool_type) + + return self + + def clear_work(self): + + """ Detaches any loaded text work and resets the iteration number. """ + + self.work_queue = [] + self.work_iteration = 0 + + journal_update = f"clearing work queue - reset" + self.write_to_journal(journal_update) + + return True + + def set_work_iteration(self, num): + + """ Sets the work iteration number. """ + + if num < len(self.work_queue): + self.work_iteration = num + + journal_update = f"setting work iteration to entry - {str(num)}" + self.write_to_journal(journal_update) + + return True + + def top_of_work_queue(self): + + """ Sets the work iteration number to the last item in the work queue and returns this value. """ + + self.work_iteration = len(self.work_queue) - 1 + return self.work_iteration + + def increment_work_iteration(self): + + """ Increments the work iteration - will return None if nothing left in the processing queue. """ + + if (self.work_iteration + 1) < len(self.work_queue): + self.work_iteration += 1 + output_value = self.work_iteration + journal_update = f"incrementing work iteration to entry - {str(self.work_iteration)}" + else: + journal_update = f"completed all work processing" + output_value = None + + self.write_to_journal(journal_update) + + return output_value + + def _expand_report(self): + + """ Creates an incremental empty report dictionary in line with creation of a new work item. """ + + self.report.append({}) + return len(self.report) + + def load_work(self, text, text_key="text"): + + """ Flexible intake method accepts multiple forms of input text: + --if string, then packages as a dictionary, and adds to the work_queue + --if dictionary, then checks the keys and adds to the work_queue + --if list, then unpacks and iterates, adding each entry as a dictionary onto the work queue """ + + new_entries_created = 0 + + if isinstance(text, str): + new_entry = {"text": text, "file_source": "NA", "page_num": "NA"} + self.work_queue.append(new_entry) + new_entries_created += 1 + self._expand_report() + + if isinstance(text, dict): + if text_key in text and "file_source" in text and "page_num" in text: + self.work_queue.append(text) + new_entries_created += 1 + self._expand_report() + else: + if text_key not in text: + logging.warning("could not identify dictionary type.") + return -1 + else: + if "file_source" not in text: + text.update({"file_source": "NA"}) + if "page_num" not in text: + text.update({"page_num": "NA"}) + self.work_queue.append(text) + new_entries_created += 1 + self._expand_report() + + if isinstance(text, list): + # need to check the type of the entries in the list + for i, elements in enumerate(text): + + if isinstance(elements, str): + new_entry = {"text": elements, "file_source": "NA", "page_num": "NA"} + self.work_queue.append(new_entry) + new_entries_created += 1 + self._expand_report() + + if isinstance(elements, dict): + if text_key in elements and "file_source" in elements and "page_num" in elements: + self.work_queue.append(elements) + new_entries_created += 1 + self._expand_report() + else: + if text_key not in elements: + logging.warning("update: load - skipping - could not identify " + "dictionary type - %s", elements) + else: + if "file_source" not in elements: + elements.update({"file_source": "NA"}) + if "page_num" not in elements: + elements.update({"page_num": "NA"}) + self.work_queue.append(elements) + new_entries_created += 1 + self._expand_report() + + journal_update = f"loading new processing text - {str(new_entries_created)} new entries" + self.write_to_journal(journal_update) + + return self.work_queue + + def clear_state(self): + + """ Resets key state variables of LLMfx instance """ + + self.journal = [] + self.tools_deployed = [] + self.inference_calls = 0 + self.response_list = [] + # self.report = {} + self.report = [] + self.step = 0 + + return self + + def activity_summary(self): + + """ Provides an activity summary and writes to journal. """ + + activity_summary = {"inference_count": self.inference_calls, "tools_used": len(self.tools_deployed), + "tools": self.tools_deployed} + + journal_update = f"generating activity_summary - {str(activity_summary)}" + self.write_to_journal(journal_update) + + return activity_summary + + def show_report(self, iteration_num=None,add_source=True): + + """ Shows the gathered report so far, and writes to journal. """ + + output_report = [] + + if iteration_num: + + if not isinstance(iteration_num,list): + iteration_num = [iteration_num] + + # show specific report(s) + journal_update = f"showing selected reports - {str(iteration_num)}\n" + for n in iteration_num: + journal_update += f"showing gathered report - {str(self.report[n])}\n" + for key, value in self.report[n].items(): + journal_update += f"\t\t\t\t -- {key.ljust(20)} - {str(value).ljust(40)}\n" + + source_info = "" + if "file_source" in self.work_queue[n]: + source_info += self.work_queue[n]["file_source"] + if "page_num" in self.work_queue[n]: + source_info += " - page: " + str(self.work_queue[n]["page_num"]) + + key= "source_info" + value = source_info + if source_info: + journal_update += f"\t\t\t\t -- {key.ljust(20)} - {str(value).ljust(40)}\n" + + base_report = self.report[n] + if add_source: + base_report.update({"source": self.work_queue[n]}) + + output_report.append(base_report) + + self.write_to_journal(journal_update) + + else: + # show all reports + output_report = [] + journal_update = f"showing all gathered reports - {str(self.report)}\n" + for i, entries in enumerate(self.report): + journal_update += f"report - {str(i)} - {str(self.report[i])}\n" + for key, value in self.report[i].items(): + journal_update += f"\t\t\t\t -- {key.ljust(20)} - {str(value).ljust(40)}\n" + if add_source: + entries.update({"source": self.work_queue[i]}) + output_report.append(entries) + self.write_to_journal(journal_update) + # output_report = self.report + + return output_report + + def lookup_response_by_tool(self, tool_type): + + """ Looks up an item in the response list by tool type. """ + + output = [] + + for i, response in enumerate(self.response_list): + if response["tool"] == tool_type: + output.append(response) + + return output + + def follow_up_list(self, key=None, value=None): + + """ Analyzes response list and returns sub-set with matching 'key' and 'value' """ + + follow_up_list = [] + + if not key: + journal_update = f"building follow-up_list - looking for distinct work items\n" + else: + journal_update = f"building follow_up_list - looking for {key} - {value}\n" + + key_value_str = f"{key} - {value}" + + for i, response in enumerate(self.response_list): + if "llm_response" in response: + + work_num = response["work_iteration"] + text = response["text"] + + if key: + if key in response["llm_response"]: + if value in response["llm_response"][key]: + follow_up_list.append(work_num) + + journal_update += f"\t\t\t\t -- {key_value_str.ljust(20)} - {str(work_num)} - {str(text)}\n" + + else: + if work_num not in follow_up_list: + follow_up_list.append(work_num) + placeholder = "distinct_work_item" + journal_update += f"\t\t\t\t -- {placeholder.ljust(20)} - {str(work_num)} - {str(text)}\n" + + self.write_to_journal(journal_update) + + return follow_up_list + + def analyze_responses(self, key,value): + + """ Analyzes response list and returns sub-set with matching 'key' and 'value' """ + + journal_update = f"analyzing responses - looking for {key} - {value}\n" + + output_list = [] + key_value_str = f"{key} - {value}" + + for i,response in enumerate(self.response_list): + if "llm_response" in response: + if key in response["llm_response"]: + if value in response["llm_response"][key]: + output_list.append(response) + + cl = response["confidence_score"] + text = response["work_item"]["text"] + step = response["step"] + + journal_update += f"\t\t\t\t -- {key_value_str.ljust(20)} - {str(step)} - {str(text)}\n" + + self.write_to_journal(journal_update) + + return output_list + + def load_tool(self, tool_type): + + """ Loads a single tool """ + + model = None + if tool_type in self._supported_tools: + + journal_update = f"loading tool - {tool_type}" + self.write_to_journal(journal_update) + + setattr(self, tool_type + "_model", + ModelCatalog().load_model(self._default_tool_map[tool_type],api_key=self.api_key)) + + model = getattr(self, tool_type + "_model") + + if tool_type not in self.tools_deployed: + self.tools_deployed.append(tool_type) + + return model + + def load_tool_list(self, tool_list): + + """ Loads a list of tool, typically at the start of a multi-step process. """ + + for tool_type in tool_list: + + if tool_type in self._supported_tools: + + model = getattr(self, tool_type + "_model") + + if not model: + self.load_tool(tool_type) + + return self + + def unload_tool(self, tool_type): + + """ Unloads a tool, which removes it from memory - useful in long-running processes + to be able to load and unload different tools. """ + + if tool_type in self._supported_tools: + + journal_update = f"unloading tool - {tool_type}" + self.write_to_journal(journal_update) + + model = getattr(self, tool_type + "_model") + model.unload_model() + + delattr(self, tool_type + "_model") + setattr(self, tool_type + "_model", None) + gc.collect() + + return 0 + + def write_to_journal(self, journal_update): + + """ Adds an event to the running journal list and displays if in verbose mode. """ + + self.journal.append(journal_update) + self.step += 1 + + if self.verbose: + print(f"step - \t{str(self.step)} - \t{journal_update}") + + return True + + def exec_function_call(self, tool_type, text=None, function="classify", params=None, get_logits=True): + + """ Executes a function call on the selected tool type. """ + + value_output = {} + + if tool_type in self._supported_tools: + + model = getattr(self, tool_type + "_model") + + # if model not yet loaded, then load in-line + if not model: + model = self.load_tool(tool_type) + + function_call = getattr(model, "function_call") + + journal_update = f"executing function call - deploying - {tool_type} " + self.write_to_journal(journal_update) + + if text: + # if text passed directly, then add to work queue + self.load_work(text) + # set work iteration to be the last item + self.top_of_work_queue() + + # pull from the work queue + work_dict = self.work_queue[self.work_iteration] + work_iter = self.work_iteration + text = work_dict["text"] + + if not self.analyze_mode: + get_logits = False + + response = function_call(text, function=function, params=params, get_logits=get_logits) + + self.inference_calls += 1 + output_response = {} + logit_analysis = {} + + if response: + + if "llm_response" in response: + + llm_response = response["llm_response"] + output_type = response["usage"]["type"] + usage= response["usage"] + + if response["usage"]["type"] == "dict": + dict_output = True + self.report[work_iter] = self.report[work_iter] | response["llm_response"] + + else: + logging.warning("update: could not automatically convert to dictionary - " + "keeping as string output") + dict_output = False + + # assemble output + value_output.update({"llm_response": llm_response,"dict_output": dict_output}) + + # start journaling update + journal_update = f"executing function call - " \ + f"getting response - {tool_type}\n" + journal_update += f"\t\t\t\t -- llm_response - {str(llm_response)}\n" + journal_update += f"\t\t\t\t -- output type - {output_type}\n" + journal_update += f"\t\t\t\t -- usage - {usage}" + + self.write_to_journal(journal_update) + # end journaling + + # default - if not found/applied + confidence_score = -1 + + if get_logits: + logit_analysis = ModelCatalog().logit_analysis(response, model.model_card, + model.hf_tokenizer_name, + api_key=self.api_key) + + confidence_score = logit_analysis["confidence_score"] + ryg = logit_analysis["ryg_string"] + choices = logit_analysis["choices"] + + # will display and add to journal only the 'first' token choice + # choices for each token captured in 'logit_analysis' metadata + if len(choices) > 1: + choices = choices[0] + + marker_tokens = logit_analysis["marker_tokens"] + output_response.update({"logit_analysis": logit_analysis}) + + # start journaling update + journal_update = f"analyzing response - {tool_type}\n" + journal_update += f"\t\t\t\t -- confidence score - {str(confidence_score)}\n" + journal_update += f"\t\t\t\t -- analyzing response - {ryg}\n" + journal_update += f"\t\t\t\t -- analyzing response - {choices}" + if marker_tokens: + journal_update += "\n" + journal_update += f"\t\t\t\t -- analyzing response - {str(marker_tokens)}" + + self.write_to_journal(journal_update) + + value_output.update({"confidence_score": confidence_score}) + if marker_tokens: + value_output.update({"choices": marker_tokens}) + + # assemble output response dictionary + + output_response = {"step": self.step, "tool": tool_type, "inference": self.inference_calls, + "llm_response": llm_response} + + if get_logits: + output_response.update({"confidence_score": confidence_score}) + + output_response.update({"llm_usage": usage, "work_iteration": work_iter, "dict_output": dict_output}) + + for keys, values in work_dict.items(): + output_response.update({keys:values}) + + if get_logits: + output_response.update({"logit_analysis": logit_analysis}) + + # save to response list state tracker + self.response_list.append(output_response) + + else: + raise ModelNotFoundException(tool_type) + + # print("update: output_response - ", output_response) + + # replacing output_response with value_output which is a dictionary "subset" of the full output response + + return value_output + + def exec_multitool_function_call(self, tool_type_list, text=None, function="classify", params=None, + get_logits=True): + + """ Executes multiple function calls on the same text with a list of tools in tool_type_list """ + + output_list = [] + + for tool_type in tool_type_list: + + response = self.exec_function_call(tool_type,text=text,get_logits=get_logits, + params=params, function=function) + + output_list.append(response) + + return output_list + + def sentiment(self, text=None, params=None): + + """ Executes sentiment analysis on text, if passed directly, or will pull current work item from the + queue. Returns value output dictionary with sentiment classification, confidence score and choices. """ + + if not params: + # default parameter key + params = ["sentiment"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("sentiment", text=text, params=params) + + def topics(self, text=None, params=None): + + """ Executes topics analysis on text, if passed directly, or will pull current work item from the queue. + Returns value output dictionary with topics classification and confidence score. """ + + if not params: + # default parameter key + params = ["topic"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("topics", text=text, params=params) + + def named_entity_extraction(self, text=None, params=None): + + """ Executes named entity classification analysis on a text, if passed directly, or will pull current + work item from the queue. Returns value output dictionary with named entity classification and + confidence score. """ + + if not params: + # default parameter key + params = ["people", "place", "company", "misc"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("ner", text=text, params=params) + + def ner(self, text=None, params=None): + + """ Executes named entity classification analysis on a text, if passed directly, or will pull current + work item from the queue. Returns value output dictionary with named entity classification and + confidence score. """ + + #TODO: identical to "named_entity_extraction" method - should remove one of them + + if not params: + # default parameter key + params = ["people", "place", "company", "misc"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("ner", text=text, params=params) + + def ratings(self, text=None, params=None): + + """ Executes ratings classification analysis on a text of 1-5, if passed directly, or will pull current + work item from the queue. Returns value output dictionary with rating classification and + confidence score. """ + + if not params: + # default parameter key + params = ["rating"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("ratings", text=text, params=params) + + def emotions(self, text=None, params=None): + + """ Executes emotions classification analysis on a text, if passed directly, or will pull current + work item from the queue. Returns value output dictionary with emotions classification and + confidence score. """ + + if not params: + # default parameter key + params = ["emotions"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("emotions", text=text, params=params) + + def intent(self, text=None, params=None): + + """ Executes intent classification analysis on a text, if passed directly, or will pull current + work item from the queue. Returns value output dictionary with intent classification and + confidence score. """ + + if not params: + # default parameter key + params = ["intent"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("intent", text=text, params=params) + + def tags(self, text=None, params=None): + + """ Generates a list of relevant 'tag' information data points from a text, if passed directly, or + will pull current work item from the queue. Returns value output dictionary with list of key + highlighted points. """ + + if not params: + # default parameter key + params = ["tags"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("tags", text=text, params=params) + + def category(self, text=None, params=None): + + """ Generates a list of relevant business category information data points from a text, if passed + directly, or will pull current work item from the queue. Returns value output dictionary with list of + business category classification (usually a single entry, but possible for multiple entries). """ + + if not params: + # default parameter key + params = ["category"] + + if isinstance(params, str): + params = [params] + + return self.exec_function_call("category", text=text, params=params) + + def nli(self, text1, text2, params=None): + + """ Executes a natural language inference classification on a text, if passed directly, or will pull current + work item from the queue. Returns value output dictionary with the NLI classification and + confidence score. """ + + if not params: + # default parameter key + params = ["evidence"] + + if isinstance(params, str): + params = [params] + + context = "Evidence: " + text1 + "\n" + "Conclusion: " + text2 + + return self.exec_function_call("nli", text=context, params=params) + + def verify_llm_response(self, input_context, llm_response): + + """ Utility function to apply NLI to compare llm_response with the input context. """ + + return self.nli(input_context, llm_response) + + def answer(self, question, context=None, key=None): + + """ Executes an inference """ + + model = getattr(self, "answer" + "_model") + + # insert change - load model in-line + # if model not yet loaded, then load in-line + if not model: + model = self.load_tool("answer") + # end - insert change + + inference = getattr(model, "inference") + + journal_update = f"executing function call - deploying - question-answer tool " + self.write_to_journal(journal_update) + + if context: + self.load_work(context) + + work_dict = self.work_queue[self.work_iteration] + text = work_dict["text"] + work_iter = self.work_iteration + + response = inference(question, add_context=text, add_prompt_engineering=True) + + llm_response = re.sub("[\n\r]", "\t", response["llm_response"]) + + if not key: + self.report[work_iter].update({"answer": [llm_response]}) + answer_key = "answer" + else: + self.report[work_iter].update({key:[llm_response]}) + answer_key = key + + usage = response["usage"] + + self.inference_calls += 1 + + # start journaling update + journal_update = f"executing function call - " \ + f"getting response - question - {answer_key}\n" + journal_update += f"\t\t\t\t -- llm_response - {str(llm_response)}\n" + journal_update += f"\t\t\t\t -- output type - text\n" + journal_update += f"\t\t\t\t -- usage - {usage}" + + self.write_to_journal(journal_update) + + # assemble output response dictionary + + output_response = {"step": self.step, "tool": "answer", "inference": self.inference_calls, + "llm_response": llm_response} + + get_logits=False + + if get_logits: + confidence_score =-1 + output_response.update({"confidence_score": confidence_score}) + + output_response.update({"llm_usage": usage, "work_iteration": work_iter, "dict_output": False}) + + for keys, values in work_dict.items(): + output_response.update({keys:values}) + + if get_logits: + logit_analysis= {} + output_response.update({"logit_analysis": logit_analysis}) + + # save to response list state tracker + self.response_list.append(output_response) + + return output_response + + def sql(self, query, table_schema): + + """ Executes Text2Sql tool to convert query into SQL """ + + model = getattr(self, "sql" + "_model") + + # insert change - load model in-line + # if model not yet loaded, then load in-line + if not model: + model = self.load_tool("sql") + # end - insert change + + inference = getattr(model, "inference") + + if table_schema: + self.load_work(table_schema) + self.top_of_work_queue() + + work_dict = self.work_queue[self.work_iteration] + table_schema = work_dict["text"] + work_iter = self.work_iteration + + # initial journal update + journal_update = f"executing function call - deploying - text-to-sql\n" + journal_update += f"\t\t\t\t -- query - {query}\n" + journal_update += f"\t\t\t\t -- table_schema - {table_schema}" + self.write_to_journal(journal_update) + + response = inference(query, add_context=table_schema, add_prompt_engineering=True) + + self.inference_calls += 1 + + llm_response = response["llm_response"] + + self.report[work_iter].update({"sql": [llm_response]}) + + usage = response["usage"] + + self.inference_calls += 1 + + # start journaling update + journal_update = f"executing function call - getting response - sql\n" + journal_update += f"\t\t\t\t -- llm_response - {str(llm_response)}\n" + journal_update += f"\t\t\t\t -- output type - text\n" + journal_update += f"\t\t\t\t -- usage - {usage}" + + self.write_to_journal(journal_update) + # end journaling + + # assemble output response dictionary + + output_response = {"step": self.step, "tool": "sql", "inference": self.inference_calls, + "llm_response": llm_response} + + # logits not yet activated for inference calls - TBD - set 'get_logits = False" for now + get_logits=False + if get_logits: + confidence_score =-1 + output_response.update({"confidence_score": confidence_score}) + + output_response.update({"llm_usage": usage, "work_iteration": work_iter, "dict_output": False}) + + for keys, values in work_dict.items(): + output_response.update({keys:values}) + + if get_logits: + logit_analysis= {} + output_response.update({"logit_analysis": logit_analysis}) + + # save to response list state tracker + self.response_list.append(output_response) + + return output_response + + def query_db(self, query, table=None, table_schema=None, db=None, db_name=None): + + """ Executes two steps - converts input query into SQL, and then executes the SQL query on the DB. """ + + sql_db = SQLTables(db=db, db_name=db_name) + + if not table_schema: + if table: + table_schema = sql_db.get_table_schema(table) + + # step 1 - convert question into sql + + if not table_schema: + logging.warning("update: LLMfx - query_db - could not identify table schema - can not proceed") + return -1 + + # run inference with query and table schema to get SQL query response + response = self.sql(query, table_schema) + + # step 2 - run query + sql_query = response["llm_response"] + sql_db_name = sql_db.db_file + + # initial journal update + journal_update = f"executing research call - executing query on db\n" + journal_update += f"\t\t\t\t -- db - {sql_db_name}\n" + journal_update += f"\t\t\t\t -- sql_query - {sql_query}" + self.write_to_journal(journal_update) + + db_output = sql_db.query_db(response["llm_response"]) + + output = [] + db_response = list(db_output) + + for rows in db_response: + output.append(rows) + + result = {"step": self.step, "tool": "sql", "db_response": output, "sql_query": response["llm_response"], + "query": query,"db": sql_db_name, "work_item": table_schema} + + self.research_list.append(result) + + # start journaling update + journal_update = f"executing research - getting response - sql\n" + journal_update += f"\t\t\t\t -- result - {str(output)}" + # journal_update += f"\t\t\t\t -- output type - text" + + self.write_to_journal(journal_update) + # end journaling + + return result + + def token_comparison (self, value_string, context): + + """ Utility function to perform token-level comparison in llm_response with input source materials. """ + + # note: this is a more limited version of the QualityCheck tools used in Prompt class + + c = CorpTokenizer(remove_stop_words=True, remove_numbers=False, + one_letter_removal=True, remove_punctuation=False) + + llm_response_tokens = c.tokenize(value_string) + context_tokens = c.tokenize(context) + + # iterate thru each key point and analyze comparison match + matched = [] + unmatched = [] + + for i, tok in enumerate(llm_response_tokens): + + if tok.endswith("."): + tok = tok[:-1] + + if tok.endswith(";"): + tok = tok[:-1] + + tok = re.sub("[,();$\"\n\r\t\u2022\u201c\u201d]", "", tok) + + if len(tok) > 0: + + match_found = False + + for j, etoks in enumerate(context_tokens): + + if etoks.endswith("."): + etoks = etoks[:-1] + + if etoks.endswith(";"): + etoks = re.sub("[(),;$\n\r\t\"\u2022\u201c\u201d]", "", etoks) + + if tok == etoks: + # found matching token + match_found = True + matched.append(tok) + break + + if not match_found: + unmatched.append(tok) + + # match_percent = 0.0 + match_percent = "{0:.1f}%".format(0.0) + match_fr = 0.0 + + if (len(matched) + len(unmatched)) > 0: + + match_fr = len(matched) / (len(matched) + len(unmatched)) + + if match_fr > 1.0: + match_fr = 1.0 + + match_percent = "{0:.1f}%".format((match_fr * 100)) + + comparison_stats = {"percent_display": match_percent, + "confirmed_words": matched, + "unconfirmed_words": unmatched, + "verified_token_match_ratio": match_fr, + } + + return comparison_stats + + +class SQLTables: + + """ SQLTables is a class for creating and accessing external SQL data, primarily as a resource that is + accessible via Text2SQL programmatic inferences. + + This is an **experimental** feature, and currently supports only use of SQLite, configured as a separate + local file-based DB, e.g., sqlite-experimental.db + + Use of this class will create a separate sqlite_experimental.db per the configs in SQLiteConfig + """ + + def __init__(self, db=None, db_name=None, experimental=True): + + self.db = "sqlite" + + # default config for "db_experimental" = "sqlite_experimental.db" + self.db_name = SQLiteConfig().get_config("db_experimental") + + if experimental: + self.db_file = SQLiteConfig().get_uri_string_experimental_db() + logging.info("update: connecting to experimental sqlite db - %s", self.db_file) + + else: + self.db_file = SQLiteConfig().get_uri_string() + logging.info("warning: connecting to main sqlite db - %s", self.db_file) + + self.conn = sqlite3.connect(self.db_file) + + self.tables = [] + + def get_table_schema(self,table_name): + + """ Lookup of table_schema for an input table_name - outputs 'create table schema string' that can + be used directly as context in a text2sql inference """ + + table_schema = "" + + sql_query = f"SELECT * FROM sqlite_master WHERE type = 'table' AND name = '{table_name}';" + + table_schema_row = self.conn.cursor().execute(sql_query) + table_schema_row = list(table_schema_row) + + if len(table_schema_row) > 0: + table_schema = table_schema_row[0][4] + + return table_schema + + def get_column_names(self, table_name): + + """ Gets the column names from a table, and provides a list as output. """ + + column_names = [] + + sql_query_pragma = "PRAGMA table_info('{}')".format(table_name) + column_info = self.conn.cursor().execute(sql_query_pragma) + + for entries in column_info: + # print("pragma - columns info output - ", entries) + column_names.append(entries[1]) + + return column_names + + def query_db(self, sql_query): + + """ Executes a query directly on database """ + + # note: security and access are left to the user to manage + + try: + result = self.conn.cursor().execute(sql_query) + except: + logging.warning("update: query generated error - not successful - %s", sql_query) + + # if sql query generates error, then an empty result is returned + result = [] + + return result + + def delete_experimental_db(self, confirm_delete=False): + + """ Deletes the experimental db """ + + # delete db and start fresh + if confirm_delete: + shutil.rmtree(self.db_file) + logging.warning("update: deleted sqlite experimental db - %s ", self.db_file) + + return True + + def delete_table(self, table_name, confirm_delete=False): + + """ Deletes a table on the experimental db """ + + if confirm_delete: + + sql_instruction = f"DROP TABLE {table_name};" + results = self.conn.cursor().execute(sql_instruction) + self.conn.commit() + logging.warning("update: delete sqlite experimental db - table - %s ", table_name) + + return 0 + + def register_table(self, sql_table_create): + self.tables.append(sql_table_create) + return self.tables + + def reset_tables(self): + self.tables = [] + return True + + def table_exists_check(self, table_name): + + """Checks if table exists - true if exists, false if does not exist. """ + + sql_query = f"SELECT * FROM sqlite_master WHERE type = 'table' AND name = '{table_name}';" + + results = self.conn.cursor().execute(sql_query) + + if len(list(results)) > 0: + table_exists = True + else: + table_exists = False + + return table_exists + + def load_csv(self, fp, fn): + + """ Opens CSV file at folder_path fp and file_name fn and returns array-like output in memory """ + + in_path = os.path.join(fp,fn) + + # csv encoding can vary - utf-8-sig and errors='ignore' seems to be the most resilient for wide range of csv + record_file = open(in_path, encoding='utf-8-sig',errors='ignore') + c = csv.reader(record_file, dialect='excel', doublequote=False, delimiter=',') + output = [] + for lines in c: + output.append(lines) + record_file.close() + + return output + + def create_new_table(self, output, table_name): + + """ Creates a new table, deriving the column names from an implied header row in the output, + and a sniff test on the value types. """ + + col_names = [] + + if len(output) > 1: + header_row = output[0] + test_row = output[1] + + keys_list = "(" + + sql_create_table = f"CREATE TABLE {table_name} (" + for i, entry in enumerate(header_row): + col_name = re.sub("[\xfe\xff]","",entry) + try: + test_int = int(test_row[i]) + type="integer" + except: + type="text" + + col_names.append(col_name) + + keys_list += col_name + ", " + + sql_create_table += col_name + " " + type + ", " + + if sql_create_table.endswith(", "): + sql_create_table = sql_create_table[:-2] + + sql_create_table += " )" + + if keys_list.endswith(", "): + keys_list = keys_list[:-2] + + keys_list += " )" + + self.conn.cursor().execute(sql_create_table) + + return col_names + + def insert_new_row(self, table_name, keys_list, new_row): + + """ Inserts a new row into table. """ + + col_names = "(" + for cols in keys_list: + col_names += cols + ", " + if col_names.endswith(", "): + col_names = col_names[:-2] + col_names += ")" + + values_list = "(" + for j in range(0, len(new_row)): + values_list += "$" + str(j + 1) + ", " + + if values_list.endswith(", "): + values_list = values_list[:-2] + + values_list += ")" + + new_record = f"INSERT INTO {table_name} {col_names} VALUES {values_list};" + + logging.info("update: inserting new_record - %s ", new_record) + + self.conn.cursor().execute(new_record, new_row) + + return True + + def create_new_table_from_csv(self,fp=None, fn=None, table_name=None): + + """ Designed for rapid prototyping - input is a well-formed csv file with assumed header row with + each entry representing a column name, and well-formed rows. """ + + # load csv + output = self.load_csv(fp,fn) + + # check if table exists + if not self.table_exists_check(table_name): + + logging.info("update: table does not exist - so creating") + # need to build the table + column_names = self.create_new_table(output, table_name) + logging.info("update: table created - column names - %s ", column_names) + + else: + print("update: table exists - getting column names") + column_names = self.get_column_names(table_name) + + # insert records + + new_record = "" + for i in range(1, len(output)): + + # print("update: inserting new record - ", i, output[i]) + + self.insert_new_row(table_name,column_names,output[i]) + + self.conn.commit() + self.conn.close() + + logging.info("update: done inserting records into new table") + + return 0 + + + + + diff --git a/llmware/configs.py b/llmware/configs.py index 9c224171..761eca64 100644 --- a/llmware/configs.py +++ b/llmware/configs.py @@ -1,5 +1,3 @@ - - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -13,7 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The configs module implements the configuration logic using classes for llmware. +The implementation includes the central llmware config class LLMWareConfig, and the config classes for all +supported text index databases and vector databases. +""" import os import platform @@ -43,7 +45,7 @@ class LLMWareConfig: "tmp_path_name": "tmp" + os.sep} # note: two alias for postgres vector db - "postgres" and "pg_vector" are the same - _supported = {"vector_db": ["neo4j", "milvus", "pg_vector", "postgres", "redis", "pinecone", "faiss", "qdrant", "mongo_atlas","lancedb"], + _supported = {"vector_db": ["chromadb", "neo4j", "milvus", "pg_vector", "postgres", "redis", "pinecone", "faiss", "qdrant", "mongo_atlas","lancedb"], "collection_db": ["mongo", "postgres", "sqlite"], "table_db": ["postgres", "sqlite"]} @@ -55,13 +57,6 @@ class LLMWareConfig: "llmware_public_models_bucket": "llmware-public-models", "shared_lib_path": os.path.join(os.path.dirname(os.path.realpath(__file__)), "lib") - # "collection_db_uri": os.environ.get("COLLECTION_DB_URI", "mongodb://localhost:27017/"), - # "collection_db_username": "", # Not used for now - # "collection_db_password": "", # Not used for now - # "milvus_host": os.environ.get("MILVUS_HOST","localhost"), - # "milvus_port": int(os.environ.get("MILVUS_PORT",19530)), - # "milvus_db": os.environ.get("MILVUS_DB", "default"), - } @classmethod @@ -518,7 +513,9 @@ class SQLiteConfig: "sqlite_db_folder_path": LLMWareConfig().get_library_path(), "user_name": "", "pw": "", - "db_name": "sqlite_llmware.db"} + "db_name": "sqlite_llmware.db", + # add new parameter for SQLTables + "db_experimental": "sqlite_experimental.db"} @classmethod def get_config(cls, name): @@ -536,6 +533,14 @@ def get_uri_string (cls): db_file = os.path.join(cls._conf["sqlite_db_folder_path"], cls._conf["db_name"]) return db_file + # new method for SQLTables DB + @classmethod + def get_uri_string_experimental_db(cls): + """For SQLite the URI string is the local file with full absolute path""" + db_file = os.path.join(cls._conf["sqlite_db_folder_path"], cls._conf["db_experimental"]) + return db_file + # end method + @classmethod def get_db_configs(cls): configs = {} @@ -738,3 +743,120 @@ def get_db_pw(cls): @classmethod def get_database_name(cls): return cls._conf["database"] + + +class ChromaDBConfig: + """Configuration object for chroma. + + The default is to use chroma as an in-memory (ephemeral) store. + + Chroma can be used with or without (default) a client/server architecture. If it is used with a client/server + architecture, you have to set the authentication meachanism. The authentication mechanism can be either + username/password or token. + - env variable CHROMA_HOST is None -> not client/server mode (default), + - env variable CHROMA_HOST is set -> client/server mode + + If you want to use Chroma without the client/server architecture, the env variable CHROMA_HOST has to be + None (default). In this mode, you can choos between in-memory (also called ephemeral, non-persistent) and + persistent. + - env variable CHROMA_PERSISTENT_PATH is None -> in-memory (non-persistent), + - env variable CHROMA_PERSISTENT_PATH is set -> persistent storage. + + If you want to use Chroma in client/server mode, the env variable CHROMA_HOST needs to be set. In addition, + you have to set + - env variable CHROMA_SERVER_AUTH_PROVIDER, and + - env variable CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER + the value depends on the authentication mechanism you want to use. + + For more information, please visit https://docs.trychroma.com/getting-started + """ + + _conf = { + 'collection': os.environ.get('CHROMA_COLLECTION', 'llmware'), + + # + # Persistent path to make chroma persistent. + # If this is None, then an in-memory only chroma instance will be created. + # + 'persistent_path': os.environ.get('CHROMA_PERSISTENT_PATH', None), + + # + # Configs below are only relevant when chromadb is run in client/server mode. + # + 'host': os.environ.get('CHROMA_HOST', None), + 'port': os.environ.get('CHROMA_PORT', 8000), + 'ssl': os.environ.get('CHROMA_SSL', False), + 'headers': os.environ.get('CHROMA_HEADERS', {}), + + # The provider decides whether we use authentication via username and password, or via a token. + # - For the username and password, this has to be set to chromadb.auth.basic.BasicAuthServerProvider + # - For the token, this has to be set to chromadb.auth.token.TokenAuthServerProvider + 'auth_provider': os.environ.get('CHROMA_SERVER_AUTH_PROVIDER', None), + + # The credential provider supplies the username and password or the token. This setting hence + # depends on the variable just above. + # - For the username and password, this has to be set to chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider + # - For the token, this has to be set to chromadb.auth.token.TokenAuthServerProvider + 'auth_credentials_provider': os.environ.get('CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER', None), + + # Settings for authentication via username and password. + 'user': os.environ.get('CHROMA_USERNAME', 'admin'), + 'password': os.environ.get('CHROMA_PASSWORD', 'admin'), + 'auth_credentials_file': os.environ.get('CHROMA_SERVER_AUTH_CREDENTIALS_FILE', 'server.htpasswd'), + + # Settings for authentication via token. + 'auth_credentials': os.environ.get('CHROMA_SERVER_AUTH_CREDENTIALS', None), + 'auth_token_transport_header': os.environ.get('CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER', None), + } + + @classmethod + def get_db_configs(cls): + configs = {} + for keys, values in cls._conf.items(): + configs.update({keys:values}) + return configs + + @classmethod + def get_config(cls, name): + if name in cls._conf: + return cls._conf[name] + raise ConfigKeyException(name) + + @classmethod + def set_config(cls, name, value): + cls._conf[name] = value + + @classmethod + def get_uri_string(cls): + return cls._conf["uri"] + + @classmethod + def get_user_name(cls): + return cls._conf["user"] + + @classmethod + def get_db_pw(cls): + return cls._conf["password"] + + @classmethod + def get_collection_name(cls): + return cls._conf["collection"] + @classmethod + def get_auth_provider(cls): + return cls._conf["auth_provider"] + + @classmethod + def get_auth_credentials_provider(cls): + return cls._conf["auth_credentials_provider"] + + @classmethod + def get_auth_credentials_file(cls): + return cls._conf["auth_credentials_file"] + + @classmethod + def get_auth_credentials(cls): + return cls._conf["auth_credentials"] + + @classmethod + def get_auth_token_transport_header(cls): + return cls._conf["auth_token_transport_header"] diff --git a/llmware/embeddings.py b/llmware/embeddings.py index e411ac16..deeeb6cb 100644 --- a/llmware/embeddings.py +++ b/llmware/embeddings.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -12,6 +11,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The embeddings module implements the supported vector databases. + +The common abstraction for all supported vector databases is the EmbeddingHandler class, which supports +creating a new embedding, as well as searching and deleting the vector index. The module also implements the +_EmbeddingUtils class, which provides a set of of funtions used by all vector database classes. +""" import os @@ -68,9 +73,15 @@ except: pass +# optional import of chromadb - not in project requirements +try: + import chromadb +except: + pass + from llmware.configs import LLMWareConfig, MongoConfig, MilvusConfig, PostgresConfig, RedisConfig, \ - PineconeConfig, QdrantConfig, Neo4jConfig, LanceDBConfig + PineconeConfig, QdrantConfig, Neo4jConfig, LanceDBConfig, ChromaDBConfig from llmware.exceptions import (UnsupportedEmbeddingDatabaseException, EmbeddingModelNotFoundException, DependencyNotInstalledException) from llmware.resources import CollectionRetrieval, CollectionWriter @@ -185,6 +196,10 @@ def _load_embedding_db(self, embedding_db, model=None, model_name=None, embeddin return EmbeddingNeo4j(self.library, model=model, model_name=model_name, embedding_dims=embedding_dims) + if embedding_db == "chromadb": + return EmbeddingChromaDB(self.library, model=model, model_name=model_name, + embedding_dims=embedding_dims) + def generate_index_name(self, account_name, library_name, model_name, max_component_length=19): """ Creates a unique name for the vector index that concats library_name + model_name + account_name """ @@ -2060,3 +2075,148 @@ def _query(self, query, parameters=None): return [d.data() for d in data] except CypherSyntaxError as e: raise ValueError(f'Cypher Statement is not valid\n{e}') + + +class EmbeddingChromaDB: + + def __init__(self, library, model=None, model_name=None, embedding_dims=None): + # + # General llmware set up code + # + + # look up model card + if not model and not model_name: + raise EmbeddingModelNotFoundException("no-model-or-model-name-provided") + + + self.library = library + self.library_name = library.library_name + self.model = model + self.model_name = model_name + self.embedding_dims = embedding_dims + self.account_name = library.account_name + + # if model passed (not None), then use model name + if self.model: + self.model_name = self.model.model_name + self.embedding_dims = model.embedding_dims + + + # + # ChromaDB instantiation + # + + # Get environment variables to decide which client to use. + persistent_path = ChromaDBConfig.get_config('persistent_path') + host = ChromaDBConfig.get_config('host') + + # Instantiate client. + if host is None and persistent_path is None: + self.client = chromadb.EphemeralClient() + + if persistent_path is not None: + self.client = chromadb.PersistentClient(path=persistent_path) + + if host is not None: + self.client = chromadb.HttpClient(host=host, + port=ChromaDBConfig.get_config('port'), + ssl=ChromaDBConfig.get_config('ssl'), + headers=ChromaDBConfig.get_config('headers')) + + collection_name = ChromaDBConfig.get_config('collection') + # If the collection already exists, it is returned. + self._collection = self.client.create_collection(name=collection_name, get_or_create=True) + + + # + # Embedding utils + # + self.utils = _EmbeddingUtils(library_name=self.library_name, + model_name=self.model_name, + account_name=self.account_name, + db_name="chromadb", + embedding_dims=self.embedding_dims) + + def create_new_embedding(self, doc_ids=None, batch_size=500): + + all_blocks_cursor, num_of_blocks = self.utils.get_blocks_cursor(doc_ids=doc_ids) + + # Initialize a new status + status = Status(self.library.account_name) + status.new_embedding_status(self.library.library_name, self.model_name, num_of_blocks) + + embeddings_created = 0 + current_index = 0 + finished = False + + # all_blocks_iter = all_blocks_cursor.pull_one() + + while not finished: + block_ids, doc_ids, sentences = [], [], [] + + # Build the next batch + for i in range(batch_size): + block = all_blocks_cursor.pull_one() + if not block: + finished = True + break + + text_search = block["text_search"].strip() + if not text_search or len(text_search) < 1: + continue + + block_ids.append(str(block["_id"])) + doc_ids.append(int(block["doc_ID"])) + sentences.append(text_search) + + if len(sentences) > 0: + # Process the batch + vectors = self.model.embedding(sentences) + + # Insert into ChromaDB + ids = [f'{doc_id}-{block_id}' for doc_id, block_id in zip(doc_ids, block_ids)] + metadatas = [{'doc_id': doc_id, 'block_id': block_id, 'sentence': sentence} + for doc_id, block_id, sentence in zip(doc_ids, block_ids, sentences)] + + self._collection.add(ids=ids, + documents=doc_ids, + embeddings=vectors, + metadatas=metadatas) + + + current_index = self.utils.update_text_index(block_ids, current_index) + + # Update statistics + embeddings_created += len(sentences) + status.increment_embedding_status(self.library.library_name, self.model_name, len(sentences)) + + print(f"update: embedding_handler - ChromaDB - Embeddings Created: {embeddings_created} of {num_of_blocks}") + + + embedding_summary = self.utils.generate_embedding_summary(embeddings_created) + logging.info(f'update: EmbeddingHandler - ChromaDB - embedding_summary - {embedding_summary}') + + return embedding_summary + + def search_index(self, query_embedding_vector, sample_count=10): + + block_list = [] + + # add one dimension because chroma expects two dimensions - a list of lists + query_embedding_vector = query_embedding_vector.reshape(1, -1) + + results = self._collection.query(query_embeddings=query_embedding_vector, n_results=sample_count) + + for idx_result, _ in enumerate(results['ids'][0]): + block_id = results['metadatas'][0][idx_result]['block_id'] + block_result_list = self.utils.lookup_text_index(block_id) + + for block in block_result_list: + block_list.append((block, results['distances'][0][idx_result])) + + return block_list + + def delete_index(self): + + self.client.delete_collection(self._collection.name) + self.utils.unset_text_index() diff --git a/llmware/library.py b/llmware/library.py index b8beb55f..e1b05426 100644 --- a/llmware/library.py +++ b/llmware/library.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -12,7 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The library module implements the logic for managing unstructured information (the text). +The module implements the two classes Library and LibraryCatalog. Library is responsible for organising a +collection of text and is the interface for the Parser and Embedding classes. In addition, the Library object +is passed to the Query and Prompt objects. The Library class uses the LibraryCatalog for creating, deleting, +updating, and other tasks pertaining to Libraries via the Library Card. +""" from werkzeug.utils import secure_filename import shutil @@ -540,7 +545,7 @@ def generate_knowledge_graph(self): def install_new_embedding (self, embedding_model_name=None, vector_db=None, from_hf= False, from_sentence_transformer=False, model=None, tokenizer=None, model_api_key=None, - vector_db_api_key=None, batch_size=500): + vector_db_api_key=None, batch_size=500, max_len=None, use_gpu=True): """ Main method for installing a new embedding on a library """ @@ -579,6 +584,9 @@ def install_new_embedding (self, embedding_model_name=None, vector_db=None, if vector_db not in LLMWareConfig().get_supported_vector_db(): raise UnsupportedEmbeddingDatabaseException(vector_db) + if my_model and max_len: + my_model.max_len = max_len + # step 2 - pass loaded embedding model to EmbeddingHandler, which will route to the appropriate resource embeddings = EmbeddingHandler(self).create_new_embedding(vector_db, my_model, batch_size=batch_size) diff --git a/llmware/model_configs.py b/llmware/model_configs.py index 65643327..df61bc91 100644 --- a/llmware/model_configs.py +++ b/llmware/model_configs.py @@ -1,37 +1,154 @@ +"""Global Default Configs for Models, Finetune Wrappers and Prompt Instructions Catalog. -""" Global Default Configs for Models, Finetune Wrappers and Prompt Instructions Catalog. These configs generally - do not need to be accessed directly, but can be viewed and updated through ModelCatalog and PromptCatalog classes.""" +These configs generally do not need to be accessed directly, but can be viewed and updated through +ModelCatalog and PromptCatalog classes. +""" global_model_repo_catalog_list = [ + # embedding models - direct pull from llmware repo - deprecated for HF pull + {"model_name": 'mini-lm-sbert-llmware', "display_name": "mini-lm-sbert-llmware", "model_family": "LLMWareSemanticModel", + "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 384, "context_window": 512, + "link": "", "custom_model_files": [], "custom_model_repo": ""}, + + {"model_name": 'industry-bert-insurance-llmware', "display_name": "industry-bert-insurance-llmware", + "model_family": "LLMWareSemanticModel", + "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 768, "context_window": 512, + "link": "https://huggingface.co/llmware/industry-bert-insurance-v0.1", "custom_model_files": [], + "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-insurance-v0.1"}, + + {"model_name": 'industry-bert-contracts-llmware', "display_name": "industry-bert-contracts-llmware", + "model_family": "LLMWareSemanticModel", + "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 768, "context_window": 512, + "link": "https://huggingface.co/llmware/industry-bert-contracts-v0.1", "custom_model_files": [], + "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-contracts-v0.1"}, + + {"model_name": 'industry-bert-asset-management-llmware', "display_name": "industry-bert-asset-management-llmware", + "model_family": "LLMWareSemanticModel", "model_category": "embedding", "model_location": "llmware_repo", + "embedding_dims": 768, "context_window": 512, + "link": "https://huggingface.co/llmware/industry-bert-asset-management-v0.1", "custom_model_files": [], + "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-asset-management-v0.1"}, + + {"model_name": 'industry-bert-sec-llmware', "display_name": "industry-bert-sec-llmware", "model_family": "LLMWareSemanticModel", + "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 768, "context_window": 512, + "link": "https://huggingface.co/llmware/industry-bert-sec-v0.1", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-sec-v0.1"}, + # embedding models - {"model_name": 'mini-lm-sbert', "display_name": "Sentence_Transformers (MPNet-Base)", "model_family": "LLMWareSemanticModel", - "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 384, "context_window":512, - "link": "","custom_model_files": [], "custom_model_repo": ""}, - - {"model_name": 'industry-bert-insurance', "display_name": "Insurance_LLMWare_Accelerator", "model_family": "LLMWareSemanticModel", - "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 768, "context_window":512, - "link": "https://huggingface.co/llmware/industry-bert-insurance-v0.1", "custom_model_files":[], - "custom_model_repo": ""}, - - {"model_name": 'industry-bert-contracts', "display_name": "Contracts_LLMWare_Accelerator", "model_family": "LLMWareSemanticModel", - "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 768, "context_window":512, - "link": "https://huggingface.co/llmware/industry-bert-contracts-v0.1", "custom_model_files":[], - "custom_model_repo": ""}, - - {"model_name": 'industry-bert-asset-management', "display_name": "Asset_Management_LLMWare_Accelerator", - "model_family": "LLMWareSemanticModel", "model_category": "embedding", "model_location": "llmware_repo", - "embedding_dims": 768, "context_window":512, - "link": "https://huggingface.co/llmware/industry-bert-asset-management-v0.1", "custom_model_files":[], - "custom_model_repo": ""}, - - {"model_name": 'industry-bert-sec', "display_name": "SEC_LLMWare_Accelerator", "model_family": "LLMWareSemanticModel", - "model_category": "embedding", "model_location": "llmware_repo", "embedding_dims": 768, "context_window":512, - "link": "https://huggingface.co/llmware/industry-bert-sec-v0.1", "custom_model_files": [], "custom_model_repo": ""}, + + {"model_name": "all-MiniLM-L6-v2", "display_name": "mini-lm-sbert", "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 384, "context_window": 512, + "link": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2", + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "sentence-transformers/all-MiniLM-L6-v2"}, + + {"model_name": 'all-mpnet-base-v2', "display_name": "mpnet-base", "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window": 514, + "link": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2", + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "sentence-transformers/all-mpnet-base-v2"}, + + {"model_name": 'industry-bert-insurance', "display_name": "industry-bert-insurance", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window":512, + "link": "https://huggingface.co/llmware/industry-bert-insurance-v0.1", "custom_model_files":[], + "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-insurance-v0.1"}, + + {"model_name": 'industry-bert-contracts', "display_name": "industry-bert-contracts", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window":512, + "link": "https://huggingface.co/llmware/industry-bert-contracts-v0.1", "custom_model_files":[], + "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-contracts-v0.1"}, + + {"model_name": 'industry-bert-asset-management', "display_name": "industry-bert-asset-management", + "model_family": "HFEmbeddingModel", "model_category": "embedding", "model_location": "hf_repo", + "embedding_dims": 768, "context_window":512, + "link": "https://huggingface.co/llmware/industry-bert-asset-management-v0.1", "custom_model_files":[], + "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-asset-management-v0.1"}, + + {"model_name": 'industry-bert-sec', "display_name": "industry-bert-sec", "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window":512, + "link": "https://huggingface.co/llmware/industry-bert-sec-v0.1", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/industry-bert-sec-v0.1"}, + + {"model_name": 'nomic-ai/nomic-embed-text-v1', "display_name": "nomic-text-v1", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window": 8192, + "link": "https://huggingface.co/nomic-ai/nomic-embed-text-v1", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "nomic-ai/nomic-embed-text-v1"}, + + {"model_name": 'jinaai/jina-embeddings-v2-base-en', "display_name": "jina-base-en-v2", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window": 8192, + "link": "https://huggingface.co/jinaai/jina-embeddings-v2-base-en", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "jinaai/jina-embeddings-v2-base-en"}, + + {"model_name": 'jinaai/jina-embeddings-v2-small-en', "display_name": "jina-small-en-v2", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 512, "context_window": 8192, + "link": "https://huggingface.co/jinaai/jina-embeddings-v2-small-en", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "jinaai/jina-embeddings-v2-small-en"}, + + {"model_name": 'BAAI/bge-small-en-v1.5', "display_name": "bge-small-en-v1.5", "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 384, "context_window": 512, + "link": "https://huggingface.co/BAAI/bge-small-en-v1.5", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "BAAI/bge-small-en-v1.5"}, + + {"model_name": 'BAAI/bge-large-en-v1.5', "display_name": "bge-large-en-v1.5", "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 1024, "context_window": 512, + "link": "https://huggingface.co/BAAI/bge-large-en-v1.5", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "BAAI/bge-large-en-v1.5"}, + + {"model_name": 'BAAI/bge-base-en-v1.5', "display_name": "bge-base-en-v1.5", "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window": 512, + "link": "https://huggingface.co/BAAI/bge-base-en-v1.5", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "BAAI/bge-base-en-v1.5"}, + + {"model_name": "thenlper/gte-small", "display_name": "gte-small", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 384, "context_window": 512, + "link": "https://huggingface.co/thenlper/gte-small", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "thenlper/gte-small"}, + + {"model_name": "thenlper/gte-base", "display_name": "gte-base", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 768, "context_window": 512, + "link": "https://huggingface.co/thenlper/gte-base", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "thenlper/gte-base"}, + + {"model_name": "thenlper/gte-large", "display_name": "gte-large", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 1024, "context_window": 512, + "link": "https://huggingface.co/thenlper/gte-large", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "thenlper/gte-large"}, + + {"model_name": 'llmrails/ember-v1', "display_name": "ember-v1", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 1024, "context_window": 512, + "link": "https://huggingface.co/llmrails/ember-v1", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmrails/ember-v1"}, + + {"model_name": "WhereIsAI/UAE-Large-V1", "display_name": "uae-large-v1", + "model_family": "HFEmbeddingModel", + "model_category": "embedding", "model_location": "hf_repo", "embedding_dims": 1024, "context_window": 512, + "link": "https://huggingface.co/WhereIsAI/UAE-Large-V1", "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "WhereIsAI/UAE-Large-V1"}, # add open ai embeddings {"model_name": 'text-embedding-ada-002', "display_name": "OpenAI-Embedding", "model_family": "OpenAIEmbeddingModel", - "model_category": "embedding", "model_location": "api", "context_window": 2048, "embedding_dims": 1536}, + "model_category": "embedding", "model_location": "api", "context_window": 8191, "embedding_dims": 1536}, + + {"model_name": 'text-embedding-3-small', "display_name": "OpenAI-Embedding", "model_family": "OpenAIEmbeddingModel", + "model_category": "embedding", "model_location": "api", "context_window": 8191, "embedding_dims": 1536}, + + {"model_name": 'text-embedding-3-large', "display_name": "OpenAI-Embedding", "model_family": "OpenAIEmbeddingModel", + "model_category": "embedding", "model_location": "api", "context_window": 8191, "embedding_dims": 3072}, # add cohere embeddings {"model_name": 'medium', "display_name": "Cohere-Medium-Embedding", "model_family": "CohereEmbeddingModel", @@ -104,7 +221,7 @@ {"model_name": "gpt-3.5-turbo", "display_name": "ChatGPT", "model_family": "OpenAIGenModel", "model_category": "generative-api","model_location": "api", "context_window": 4000}, - # gpt-4 add + # gpt-4 {"model_name": "gpt-4", "display_name": "GPT-4", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 8000}, @@ -113,10 +230,10 @@ "model_category": "generative-api", "model_location": "api", "context_window": 4000}, # new gpt-4 models announced in November 2023 - {"model_name": "gpt-4-1106-preview", "display_name": "GPT-4-Turbo", "model_family": "OpenAIGenModel", + {"model_name": "gpt-4-1106-preview", "display_name": "GPT-4-Turbo-1106", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 128000}, - {"model_name": "gpt-3.5-turbo-1106", "display_name": "GPT-3.5-Turbo", "model_family": "OpenAIGenModel", + {"model_name": "gpt-3.5-turbo-1106", "display_name": "GPT-3.5-Turbo-1106", "model_family": "OpenAIGenModel", "model_category": "generative-api", "model_location": "api", "context_window": 16385}, # end - gpt-4 model update @@ -137,113 +254,132 @@ "model_category": "generative-api", "model_location": "api", "context_window": 2048}, # core llmware bling open source models available in catalog directly - {"model_name": "llmware/bling-1.4b-0.1", "display_name": "Bling-Pythia-1.4B", "model_family": "HFGenerativeModel", + {"model_name": "llmware/bling-1.4b-0.1", "display_name": "bling-1.4b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space":"", "link": "https://huggingface.co/llmware/bling-1.4b-0.1", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-1.4b-0.1"}, - {"model_name": "llmware/bling-1b-0.1", "display_name": "Bling-Pythia-1.0B", "model_family": "HFGenerativeModel", + {"model_name": "llmware/bling-1b-0.1", "display_name": "bling-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-1b-0.1", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-1b-0.1"}, - {"model_name": "llmware/bling-falcon-1b-0.1", "display_name": "Bling-Falcon-1.3B", "model_family": "HFGenerativeModel", + {"model_name": "llmware/bling-falcon-1b-0.1", "display_name": "bling-falcon-1.3b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-falcon-1b-0.1", - "custom_model_files": [], "custom_model_repo": "" + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-falcon-1b-0.1" }, - {"model_name": "llmware/bling-sheared-llama-1.3b-0.1", "display_name": "Bling-Sheared-LLama-1.3B", + {"model_name": "llmware/bling-sheared-llama-1.3b-0.1", "display_name": "bling-sheared-llama-1.3b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-sheared-llama-1.3b-0.1", - "custom_model_files": [], "custom_model_repo": "" + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-sheared-llama-1.3b-0.1" }, - {"model_name": "llmware/bling-red-pajamas-3b-0.1", "display_name": "Bling-Pythia-1.4B", + {"model_name": "llmware/bling-red-pajamas-3b-0.1", "display_name": "bling-red-pajamas-3b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-red-pajamas-3b-0.1", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-red-pajamas-3b-0.1"}, - {"model_name": "llmware/bling-sheared-llama-2.7b-0.1", "display_name": "Bling-Sheared-Llama-2.7B", + {"model_name": "llmware/bling-sheared-llama-2.7b-0.1", "display_name": "bling-sheared-llama-2.7b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-sheared-llama-2.7b-0.1", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-sheared-llama-2.7b-0.1"}, - {"model_name": "llmware/bling-stable-lm-3b-4e1t-v0", "display_name": "Bling-Stable-LM-3B", + {"model_name": "llmware/bling-stable-lm-3b-4e1t-v0", "display_name": "bling-stablelm-3b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-stable-lm-3b-4e1t-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-stable-lm-3b-4e1t-v0"}, - {"model_name": "llmware/bling-cerebras-1.3b-0.1", "display_name": "Bling-Cerebras-1.3B", + {"model_name": "llmware/bling-cerebras-1.3b-0.1", "display_name": "bling-cerebras-1.3b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-cerebras-1.3b-0.1", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-cerebras-1.3b-0.1"}, - {"model_name": "llmware/bling-tiny-llama-v0", "display_name": "Bling-Tiny-Llama-v0", + {"model_name": "llmware/bling-tiny-llama-v0", "display_name": "bling-tiny-llama-1b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/bling-tiny-llama-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/bling-tiny-llama-v0"}, # create dragon models - {"model_name": "llmware/dragon-yi-6b-v0", "display_name": "Dragon-Yi-6B", + {"model_name": "llmware/dragon-yi-6b-v0", "display_name": "dragon-yi-6b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "\n", "link": "https://huggingface.co/llmware/dragon-yi-6b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-yi-6b-v0"}, - {"model_name": "llmware/dragon-stablelm-7b-v0", "display_name": "Dragon-StableLM-7B", + {"model_name": "llmware/dragon-stablelm-7b-v0", "display_name": "dragon-stablelm-7b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-stablelm-7b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-stablelm-7b-v0"}, - {"model_name": "llmware/dragon-mistral-7b-v0", "display_name": "Dragon-Mistral-7B", + {"model_name": "llmware/dragon-mistral-7b-v0", "display_name": "dragon-mistral-7b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-mistral-7b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-mistral-7b-v0"}, - {"model_name": "llmware/dragon-red-pajama-7b-v0", "display_name": "Dragon-Red-Pajama-7B", + {"model_name": "llmware/dragon-red-pajama-7b-v0", "display_name": "dragon-red-pajama-7b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-red-pajama-7b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-red-pajama-7b-v0"}, - {"model_name": "llmware/dragon-deci-6b-v0", "display_name": "Dragon-Deci-6B", + {"model_name": "llmware/dragon-deci-6b-v0", "display_name": "dragon-deci-6b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-deci-6b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-deci-6b-v0"}, - {"model_name": "llmware/dragon-falcon-7b-v0", "display_name": "Dragon-Falcon-7B", + {"model_name": "llmware/dragon-falcon-7b-v0", "display_name": "dragon-falcon-7b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-falcon-7b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-falcon-7b-v0"}, - {"model_name": "llmware/dragon-llama-7b-v0", "display_name": "Dragon-Llama-7B", + {"model_name": "llmware/dragon-llama-7b-v0", "display_name": "dragon-llama-7b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-llama-7b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-llama-7b-v0"}, - {"model_name": "llmware/dragon-deci-7b-v0", "display_name": "Dragon-Deci-7B", + {"model_name": "llmware/dragon-deci-7b-v0", "display_name": "dragon-deci-7b", "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/dragon-deci-7b-v0", - "custom_model_files": [], "custom_model_repo": ""}, + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/dragon-deci-7b-v0"}, # gguf models - {"model_name": "llmware/dragon-mistral-7b-gguf", "display_name": "Dragon-Mistral-7B-GGUF", + + # deprecated access to dragon-mistral-7b-gguf -> replaced by dragon-mistral-answer-tool + {"model_name": "llmware/dragon-mistral-7b-gguf", "display_name": "dragon-mistral-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", @@ -252,7 +388,8 @@ "link": "https://huggingface.co/llmware/dragon-mistral-7b-v0", "custom_model_files": ["dragon-mistral-7b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-mistral-7b-v0"}, - {"model_name": "llmware/dragon-llama-7b-gguf", "display_name": "Dragon-Llama-7B-GGUF", + # deprecated access to dragon-llama-7b-gguf -> replaced by dragon-llama-answer-tool + {"model_name": "llmware/dragon-llama-7b-gguf", "display_name": "dragon-llama-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "", @@ -261,7 +398,8 @@ "link": "https://huggingface.co/llmware/dragon-llama-7b-v0", "custom_model_files": ["dragon-llama-7b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-llama-7b-v0"}, - {"model_name": "llmware/dragon-yi-6b-gguf", "display_name": "Dragon-Yi-6B-GGUF", + # deprecated access to dragon-yi-6b-gguf -> replaced by dragon-yi-answer-tool + {"model_name": "llmware/dragon-yi-6b-gguf", "display_name": "dragon-yi-6b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", "temperature": 0.3, "trailing_space": "\n", @@ -270,43 +408,436 @@ "link": "https://huggingface.co/llmware/dragon-yi-6b-v0", "custom_model_files": ["dragon-yi-6b-q4_k_m.gguf"], "custom_model_repo": "llmware/dragon-yi-6b-v0"}, + {"model_name": "dragon-yi-answer-tool", "display_name": "dragon-yi-6b-answer-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "\n", + "gguf_file": "dragon-yi.gguf", + "gguf_repo": "llmware/dragon-yi-answer-tool", + "snapshot": True, + "link": "https://huggingface.co/llmware/dragon-yi-answer-tool", + "custom_model_files": ["dragon-yi.gguf"], "custom_model_repo": "llmware/dragon-yi-answer-tool"}, + + {"model_name": "dragon-llama-answer-tool", "display_name": "dragon-llama-answer-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "dragon-llama.gguf", + "gguf_repo": "llmware/dragon-llama-answer-tool", + "snapshot": True, + "link": "https://huggingface.co/llmware/dragon-llama-answer-tool", + "custom_model_files": ["dragon-llama.gguf"], "custom_model_repo": "llmware/dragon-llama-answer-tool"}, + + {"model_name": "dragon-mistral-answer-tool", "display_name": "dragon-mistral-answer-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "dragon-mistral.gguf", + "gguf_repo": "llmware/dragon-mistral-answer-tool", + "snapshot": True, + "link": "https://huggingface.co/llmware/dragon-mistral-answer-tool", + "custom_model_files": ["dragon-mistral.gguf"], "custom_model_repo": "llmware/dragon-mistral-answer-tool"}, + # selected top HF open source chat models - gguf - {"model_name": "TheBloke/Llama-2-7B-Chat-GGUF", "display_name": "Llama-2-7B-Chat-GGUF", + {"model_name": "TheBloke/Llama-2-7B-Chat-GGUF", "display_name": "llama-2-7b-chat-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", - "context_window": 2048, "instruction_following": False, "prompt_wrapper": "", + "context_window": 2048, "instruction_following": True, "prompt_wrapper": "", "temperature": 0.3, "trailing_space": "", "gguf_file": "llama-2-7b-chat.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", "custom_model_files": ["llama-2-7b-chat.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"}, - {"model_name": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", "display_name": "OpenHermes-Mistral-7B-GGUF", + {"model_name": "TheBloke/OpenHermes-2.5-Mistral-7B-GGUF", "display_name": "openhermes-mistral-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", - "context_window": 2048, "instruction_following": False, "prompt_wrapper": "chat_ml", + "context_window": 2048, "instruction_following": True, "prompt_wrapper": "chat_ml", "temperature": 0.3, "trailing_space": "", "gguf_file": "openhermes-2.5-mistral-7b.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", "custom_model_files": ["openhermes-2.5-mistral-7b.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"}, - {"model_name": "TheBloke/zephyr-7B-beta-GGUF", "display_name": "Zephyr-7B-GGUF", + {"model_name": "TheBloke/zephyr-7B-beta-GGUF", "display_name": "zephyr-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", - "context_window": 2048, "instruction_following": False, "prompt_wrapper": "hf_chat", + "context_window": 2048, "instruction_following": True, "prompt_wrapper": "hf_chat", "temperature": 0.3, "trailing_space": "", "gguf_file": "zephyr-7b-beta.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", "custom_model_files": ["zephyr-7b-beta.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon"}, - {"model_name": "TheBloke/Starling-LM-7B-alpha-GGUF", "display_name": "Berkeley-Starling-7B-GGUF", + {"model_name": "TheBloke/Starling-LM-7B-alpha-GGUF", "display_name": "starling-7b-gguf", "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", - "context_window": 2048, "instruction_following": False, "prompt_wrapper": "open_chat", + "context_window": 2048, "instruction_following": True, "prompt_wrapper": "open_chat", "temperature": 0.3, "trailing_space": "", "gguf_file": "starling-lm-7b-alpha.Q4_K_M.gguf", "gguf_repo": "llmware/bonchon", "link": "https://huggingface.co/llmware/bonchon", "custom_model_files": ["starling-lm-7b-alpha.Q4_K_M.gguf"], "custom_model_repo": "llmware/bonchon" - } + }, + + # new slim models + {"model_name": "slim-ner-tool", "display_name": "slim-ner-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-ner.gguf", + "gguf_repo": "llmware/slim-ner-tool", + "link": "https://huggingface.co/llmware/slim-ner-tool", + "custom_model_files": ["slim-ner.gguf"], "custom_model_repo": "llmware/slim-ner-tool", + # add function call parameters + "function_call": True, + "primary_keys": ["people", "location", "organization", "misc"], + "fc_output_values": [], + "tokenizer": "llmware/slim-ner", + "value_zone_markers": {"start": [6024,6796, 3366], "stop": [2033,3108]}, + "marker_tokens": [], "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-sentiment-tool", "display_name": "slim-sentiment-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-sentiment.gguf", + "gguf_repo": "llmware/slim-sentiment-tool", + "link": "https://huggingface.co/llmware/slim-sentiment-tool", + "custom_model_files": ["slim-sentiment.gguf"], "custom_model_repo": "llmware/slim-sentiment-tool", + # add function call parameters + "function_call": True, + "primary_keys": ["sentiment"], + "fc_output_values": ["positive", "neutral", "negative"], + "tokenizer": "llmware/slim-sentiment", + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [1066, 22198, 17821], + "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-emotions-tool", "display_name": "slim-emotions-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-emotions.gguf", + "gguf_repo": "llmware/slim-emotions-tool", + "link": "https://huggingface.co/llmware/slim-emotions-tool", + "custom_model_files": ["slim-emotions.gguf"], "custom_model_repo": "llmware/slim-emotions-tool", + # add function call parameters + "function_call": True, + "primary_keys": ["emotions"], + "fc_output_values": ["afraid", "anger", "angry", "annoyed", "anticipating", "anxious", "apprehensive", + "ashamed", "caring", "confident", "content", "devastated", "disappointed", "disgusted", + "embarrassed", "excited", "faithful", "fear", "furious", "grateful", "guilty", + "hopeful", "impressed", "jealous", "joy", "joyful", "lonely", "love", "nostalgic", + "prepared", "proud", "sad", "sadness", "sentimental", "surprise", "surprised", + "terrified", "trusting"], + "tokenizer": "llmware/slim-emotions", + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-ratings-tool", "display_name": "slim-ratings-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-ratings.gguf", + "gguf_repo": "llmware/slim-ratings-tool", + "link": "https://huggingface.co/llmware/slim-ratings-tool", + "custom_model_files": ["slim-ratings.gguf"], "custom_model_repo": "llmware/slim-ratings-tool", + # add function call parameters + "function_call": True, + "primary_keys": ["rating"], + "fc_output_values": ["1", "2", "3", "4", "5"], + "tokenizer": "llmware/slim-ratings", + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-intent-tool", "display_name": "slim-intent-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-intent.gguf", + "gguf_repo": "llmware/slim-intent-tool", + "link": "https://huggingface.co/llmware/slim-intent-tool", + "custom_model_files": ["slim-intent.gguf"], "custom_model_repo": "llmware/slim-intent-tool", + "function_call": True, + "primary_keys": ["intent"], + "fc_output_values": ["account", "cancel", "complaint", "customer service", "delivery", "feedback", + "invoice", "new account", "order", "payments", "refund", "shipping", + "subscription", "terminate"], + "tokenizer": "llmware/slim-intent", + "value_zone_markers": {"start": [6024, 6796, 3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-nli-tool", "display_name": "slim-nli-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-nli.gguf", + "gguf_repo": "llmware/slim-nli-tool", + "link": "https://huggingface.co/llmware/slim-nli-tool", + "custom_model_files": ["slim-nli.gguf"], "custom_model_repo": "llmware/slim-nli-tool", + "function_call": True, + "primary_keys": ["evidence"], + "fc_output_values": ["supports", "neutral", "contradicts"], + "tokenizer": "llmware/slim-nli", + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [9996,5924,17821], + "marker_token_lookup": {9996: "contradicts", 5924: "supports", 17821: "neutral"}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-topics-tool", "display_name": "slim-topics-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-topics.gguf", + "gguf_repo": "llmware/slim-topics-tool", + "link": "https://huggingface.co/llmware/slim-topics-tool", + "custom_model_files": ["slim-topics.gguf"], "custom_model_repo": "llmware/slim-topics-tool", + "function_call": True, + "primary_keys": ["topics"], + "fc_output_values": [], + "tokenizer": "llmware/slim-topics", + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-tags-tool", "display_name": "slim-tags-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-tags.gguf", + "gguf_repo": "llmware/slim-tags-tool", + "link": "https://huggingface.co/llmware/slim-tags-tool", + "custom_model_files": ["slim-tags.gguf"], "custom_model_repo": "llmware/slim-tags-tool", + "function_call": True, + "primary_keys": ["tags"], + "fc_output_values": [], + "tokenizer": "llmware/slim-tags", + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, + + {"model_name": "slim-sql-tool", "display_name": "slim-sql-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-sql.gguf", + "gguf_repo": "llmware/slim-sql-tool", + "fc_output_values": [], + "link": "https://huggingface.co/llmware/slim-sql-tool", + "custom_model_files": ["slim-sql.gguf"], "custom_model_repo": "llmware/slim-sql-tool", + "tokenizer": "llmware/slim-sql-1b-v0", + "snapshot": True}, + + {"model_name": "bling-answer-tool", "display_name": "bling-answer-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "bling-answer.gguf", + "gguf_repo": "llmware/bling-answer-tool", + "link": "https://huggingface.co/llmware/bling-answer-tool", + "custom_model_files": ["bling-answer.gguf"], "custom_model_repo": "llmware/bling-answer-tool", + # add function call parameters + "tokenizer": "llmware/bling-tiny-llama-1b-v0", + "snapshot": True}, + + {"model_name": "slim-category-tool", "display_name": "slim-category-tool", + "model_family": "GGUFGenerativeModel", "model_category": "generative_local", "model_location": "llmware_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", + "gguf_file": "slim-category.gguf", + "gguf_repo": "llmware/slim-category-tool", + "link": "https://huggingface.co/llmware/slim-category-tool", + "custom_model_files": ["slim-category.gguf"], "custom_model_repo": "llmware/slim-category-tool", + "function_call": True, + "primary_keys": ["category"], + "fc_output_values": ["analyst", "announcements", "bonds", "business", "central bank", "commentary", + "commodities", "currencies", "dividend", "earnings", "energy", "entertainment", + "financials", "health", "human resources", "legal and regulation", "macroeconomics", + "markets", "mergers and acquisitions", "opinion", "politics", "public markets", + "science", "sports", "stocks", "tech", "world"], + "tokenizer": "llmware/slim-category", + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"], + "snapshot": True}, + + # pytorch slim models start here + + {"model_name": "llmware/slim-intent", "display_name": "slim-intent-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-intent", + "hf_repo": "llmware/slim-intent", + "custom_model_files": [""], "custom_model_repo": "", + "function_call": True, + "primary_keys": ["intent"], + "fc_output_values": ["account", "cancel", "complaint", "customer service", "delivery", "feedback", + "invoice", "new account", "order", "payments", "refund", "shipping", + "subscription", "terminate"], + "function": ["classify"], + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [1066, 22198, 17821], + "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, + }, + + {"model_name": "llmware/slim-sentiment", "display_name": "slim-sentiment-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-sentiment", + "hf_repo": "llmware/slim-sentiment", + "custom_model_files": [""], "custom_model_repo": "", + "function_call": True, + "primary_keys": ["sentiment"], + "fc_output_values": ["positive", "neutral", "negative"], + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [1066, 22198, 17821], + "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, + "function": ["classify"]}, + + {"model_name": "llmware/slim-emotions", "display_name": "slim-emotions-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-emotions", + "hf_repo": "llmware/slim-emotions", + "custom_model_files": [""], "custom_model_repo": "", + "function_call": True, + "primary_keys": ["emotions"], + "fc_output_values": ["afraid", "anger", "angry", "annoyed", "anticipating", "anxious", "apprehensive", + "ashamed", "caring", "confident", "content", "devastated", "disappointed", "disgusted", + "embarrassed", "excited", "faithful", "fear", "furious", "grateful", "guilty", + "hopeful", "impressed", "jealous", "joy", "joyful", "lonely", "love", "nostalgic", + "prepared", "proud", "sad", "sadness", "sentimental", "surprise", "surprised", + "terrified", "trusting"], + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [1066, 22198, 17821], + "marker_token_lookup": {1066: "positive", 22198: "negative", 17821: "neutral"}, + "function": ["classify"]}, + + {"model_name": "llmware/slim-ner", "display_name": "slim-ner-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-ner", + "custom_model_files": [""], "custom_model_repo": "", + "hf_repo": "llmware/slim-ner", + "function_call": True, + "primary_keys": ["person", "organization", "place", "misc"], + "fc_output_values": [], + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"]}, + + {"model_name": "llmware/slim-nli", "display_name": "slim-nli-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-nli", + "custom_model_files": [""], "custom_model_repo": "", + "hf_repo": "llmware/slim-nli", + "function_call": True, + "primary_keys": ["evidence"], + "fc_output_values": ["supports", "neutral", "contradicts"], + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"]}, + + {"model_name": "llmware/slim-ratings", "display_name": "slim-ratings-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-ratings", + "hf_repo": "llmware/slim-ratings", + "custom_model_files": [""], "custom_model_repo": "", + "function_call": True, + "primary_keys": ["rating"], + "fc_output_values": ["1", "2", "3", "4", "5"], + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"]}, + + {"model_name": "llmware/slim-category", "display_name": "slim-category-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-category", + "custom_model_files": [""], "custom_model_repo": "", + "hf_repo": "llmware/slim-category", + "function_call": True, + "primary_keys": ["category"], + "fc_output_values": ["analyst", "announcements", "bonds", "business", "central bank", "commentary", + "commodities", "currencies", "dividend", "earnings", "energy", "entertainment", + "financials", "health", "human resources", "legal and regulation", "macroeconomics", + "markets", "mergers and acquisitions", "opinion", "politics", "public markets", + "science", "sports", "stocks", "tech", "world"], + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "function": ["classify"]}, + + {"model_name": "llmware/slim-tags", "display_name": "slim-tags-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-tags", + "custom_model_files": [""], "custom_model_repo": "", + "hf_repo": "llmware/slim-tags", + "function_call": True, + "value_zone_markers": {"start": [6024, 6796,3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "primary_keys": ["tags"], + "fc_output_values": [], + "function": ["classify"]}, + + {"model_name": "llmware/slim-topics", "display_name": "slim-topics-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "gguf_file": "", "gguf_repo": "", + "link": "https://huggingface.co/llmware/slim-topics", + "hf_repo": "llmware/slim-topics", + "custom_model_files": [""], "custom_model_repo": "", + "function_call": True, + "value_zone_markers": {"start": [6024, 6796, 3366], "stop": [2033, 3108]}, + "marker_tokens": [], + "marker_token_lookup": {}, + "primary_keys": ["topics"], + "fc_output_values": [], + "function": ["classify"]}, + + # sql pytorch model + {"model_name": "llmware/slim-sql-1b-v0", "display_name": "slim-sql-1b", + "model_family": "HFGenerativeModel", "model_category": "generative_local", "model_location": "hf_repo", + "context_window": 2048, "instruction_following": False, "prompt_wrapper": "human_bot", + "temperature": 0.3, "trailing_space": "", "link": "https://huggingface.co/llmware/slim-sql-1b-v0", + "custom_model_files": [], "custom_model_repo": "", + "hf_repo": "llmware/slim-sql-1b-v0", + #TODO: assess how to handle SQL models with function call parameters + "function_call": False, + "fc_output_values": [], + "primary_keys": ["sql"], "function": ["sql"]}, + ] """ Fine-tuning Prompt Wrappers - virtually all instruct fine-tuned models will have a special 'prompt wrapper' diff --git a/llmware/models.py b/llmware/models.py index c85bfdf9..5c9b0a3c 100644 --- a/llmware/models.py +++ b/llmware/models.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -13,6 +12,14 @@ # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The models module implements the model registry, the catalog for models and prompts, and all the currently +supported models, which includes the SLIM model series, the DRAGON model series, the BLING model series, +and the BERT model series. + +Besides the logic mentioned above, this module also implements the configuration for BERT and the +inference server of llmware. +""" + import logging import json import numpy as np @@ -50,10 +57,6 @@ global_default_prompt_catalog) -# api model imports -import openai, anthropic, ai21, cohere - - class _ModelRegistry: """ ModelRegistry class is wrapper class around the global_model_repo_catalog_list for easy dynamic updating """ @@ -79,6 +82,22 @@ class _ModelRegistry: prompt_wrappers = ["alpaca", "human_bot", "chatgpt", "", "open_chat", "hf_chat", "chat_ml"] registered_wrappers = global_model_finetuning_prompt_wrappers_lookup + # list of function calling classifier tools + + llm_fx_tools = ["ner", "sentiment", "topics", "ratings", "emotions", "nli", + "intent", "sql", "answer", "category", "tags"] + + llm_fx_tools_map = {"ner": "slim-ner-tool", + "sentiment": "slim-sentiment-tool", + "topics": "slim-topics-tool", + "ratings": "slim-ratings-tool", + "emotions": "slim-emotions-tool", + "nli": "slim-nli-tool", + "sql": "slim-sql-tool", + "tags": "slim-tags-tool", + "answer": "bling-answer-tool", + "category": "slim-category-tool", + "intent": "slim-intent-tool"} @classmethod def get_model_list(cls): """ List current view of registered models """ @@ -89,6 +108,16 @@ def get_wrapper_list(cls): """ List current registered wrapper formats """ return cls.registered_wrappers + @classmethod + def get_llm_fx_tools_list (cls): + """ List of function calling model tools available """ + return cls.llm_fx_tools + + @classmethod + def get_llm_fx_mapping (cls): + """ List of function calling model tools to repo name """ + return cls.llm_fx_tools_map + @classmethod def add_wrapper(cls, wrapper_name, wrapper_dict): @@ -154,7 +183,8 @@ def update_model(cls, model_name_lookup, new_model_card_dict): updated=False for i, models in enumerate(cls.registered_models): - if models["model_name"] == model_name_lookup: + # added option to match with display name + if models["model_name"] == model_name_lookup or models["display_name"] == model_name_lookup: del cls.registered_models[i] cls.registered_models.append(new_model_card_dict) updated = True @@ -169,7 +199,8 @@ def delete_model(cls, model_name): model_found=False for i, models in enumerate(cls.registered_models): - if models["model_name"] == model_name: + # added option to match with display name + if models["model_name"] == model_name or models["display_name"] == model_name: del cls.registered_models[i] model_found = True break @@ -302,7 +333,7 @@ def register_sentence_transformer_model(self, model_name, embedding_dims, contex def register_gguf_model(self, model_name, gguf_model_repo, gguf_model_file_name, prompt_wrapper=None, eos_token_id=0, display_name=None,trailing_space="", temperature=0.3, - context_window=2048, instruction_following=False): + context_window=2048, instruction_following=True): """ Registers a new GGUF model in model catalog - alternative to adding directly in the ModelRegistry """ @@ -430,7 +461,8 @@ def lookup_model_card (self, selected_model_name): # first check in the global_model_repo + confirm location for models in self.global_model_list: - if models["model_name"] == selected_model_name: + # add option to match with display_name as alternative alias for model + if models["model_name"] == selected_model_name or models["display_name"] == selected_model_name: model_card = models model_card.update({"standard":True}) break @@ -441,7 +473,7 @@ def lookup_model_card (self, selected_model_name): return model_card - def locate_and_retrieve_model_bits (self, model_card): + def locate_and_retrieve_model_bits (self, model_card, api_key=None): """ For models requiring instantiation locally, this utility method retrieves the model bits using the instructions provided in the model card entry. """ @@ -479,10 +511,18 @@ def locate_and_retrieve_model_bits (self, model_card): CloudBucketManager().pull_single_model_from_llmware_public_repo(model_folder_name) else: + # GGUF models pulled directly from HF repos logging.info("update: pulling GGUF model from HF - %s - %s", model_location, model_card) - self.pull_model_from_hf(model_card, model_location) + if "snapshot" in model_card: + # pull snapshot from gguf repo in model card + model_repo = model_card["gguf_repo"] + # replacing: model_repo = model_card["model_name"] + self.pull_snapshot_from_hf(model_repo, model_location, api_key=api_key) + else: + # general case + self.pull_model_from_hf(model_card, model_location, api_key=api_key) logging.info("update: ModelCatalog - done pulling model into local folder - %s ", model_location) @@ -548,15 +588,18 @@ def _instantiate_model_class_from_string(self, model_class, model_name, model_ca if model_class == "OpenAIEmbeddingModel": my_model = OpenAIEmbeddingModel(model_name=model_name, embedding_dims=embedding_dims, - api_key=api_key) + api_key=api_key, + model_card=model_card) if model_class == "CohereEmbeddingModel": my_model = CohereEmbeddingModel(model_name=model_name, embedding_dims=embedding_dims, - api_key=api_key) + api_key=api_key, + model_card=model_card) if model_class == "GoogleEmbeddingModel": my_model = GoogleEmbeddingModel(model_name=model_name, embedding_dims=embedding_dims, - api_key=api_key) + api_key=api_key, + model_card=model_card) if model_class == "LLMWareSemanticModel": my_model = LLMWareSemanticModel(model_name=model_name, embedding_dims=embedding_dims, @@ -607,7 +650,9 @@ def _instantiate_model_class_from_string(self, model_class, model_name, model_ca if model_class == "HFEmbeddingModel": my_model = HFEmbeddingModel(model_name=model_name, api_key=api_key, - model_card=model_card) + embedding_dims=embedding_dims, + model_card=model_card, + trust_remote_code=True) return my_model @@ -633,7 +678,7 @@ def load_model (self, selected_model, api_key=None): # step 3- if physical model, then find the location on local server, and if not available, then pull from s3 if model_card["model_location"] == "llmware_repo": - loading_directions = self.locate_and_retrieve_model_bits(model_card) + loading_directions = self.locate_and_retrieve_model_bits(model_card, api_key=api_key) my_model = my_model.load_model_for_inference(loading_directions, model_card=model_card) else: # if api_key passed, save as environ variable @@ -674,11 +719,11 @@ def load_sentence_transformer_model(self,model, model_name): model = LLMWareSemanticModel(model=model,model_name=model_name) return model - def load_hf_embedding_model(self, model, tokenizer): + def load_hf_embedding_model(self, model, tokenizer,trust_remote_code=False): """ Loads and integrates a Huggingface embedding model """ - model = HFEmbeddingModel(model, tokenizer) + model = HFEmbeddingModel(model, tokenizer, trust_remote_code=trust_remote_code) return model def load_hf_generative_model(self, model,tokenizer,prompt_wrapper=None, @@ -706,7 +751,7 @@ def load_embedding_model (self, model_name=None, # first, check for 'from_hf' flag and load as HuggingFace model if from_hf: - loaded_model = ModelCatalog().load_hf_embedding_model(model,tokenizer) + loaded_model = ModelCatalog().load_hf_embedding_model(model,tokenizer, trust_remote_code=True) else: # second, check for 'from_sentence_transformer' flag and load as SBERT model if from_sentence_transformers: @@ -797,7 +842,8 @@ def model_lookup(self,model_name): my_model = None for models in self.global_model_list: - if models["model_name"] == model_name: + # add check for match with display_name as alias + if models["model_name"] == model_name or models["display_name"] == model_name: my_model = models break @@ -811,7 +857,8 @@ def get_model_by_name(self, model_name, api_key=None): for models in self.global_model_list: - if models["model_name"] == model_name: + # add check for display name match + if models["model_name"] == model_name or models["display_name"] == model_name: selected_model = models my_model = self._instantiate_model_class_from_string(selected_model["model_family"], model_name, models,api_key=api_key) @@ -846,19 +893,367 @@ def pull_model_from_hf(self, model_card, local_model_repo_path, api_key=None): return local_model_repo_path - def pull_snapshot_from_hf(self, model_name, local_model_repo_path, api_key=None): + def pull_snapshot_from_hf(self, repo_name, local_model_repo_path, api_key=None): """ Pulls snapshot of HF model repository and saves into local folder path. """ from huggingface_hub import snapshot_download - model_name = "llmware/" + model_name - - snapshot = snapshot_download(model_name, local_dir=local_model_repo_path, token=api_key, + snapshot = snapshot_download(repo_name, local_dir=local_model_repo_path, token=api_key, local_dir_use_symlinks=False) return local_model_repo_path + def get_llm_toolkit(self, tool_list=None, api_key=None): + + """ Caches all SLIM tools by default, or if list provided, then selected tools only. """ + + model_repo_path = LLMWareConfig.get_model_repo_path() + + if not os.path.exists(model_repo_path): + os.makedirs(model_repo_path) + + if not tool_list: + tool_list = _ModelRegistry().get_llm_fx_tools_list() + + logging.info("update: ModelCatalog - get_toolset - %s ", tool_list) + + for tool in tool_list: + + tool_name = _ModelRegistry().get_llm_fx_mapping()[tool] + + logging.info("update: ModelCatalog - get_toolset - %s - %s", tool, tool_name) + + found_model = False + local_model_repo_path = os.path.join(model_repo_path, tool_name) + + if os.path.exists(local_model_repo_path): + model_parts_in_folder = os.listdir(local_model_repo_path) + if len(model_parts_in_folder) > 0: + found_model = True + + if not found_model: + + model_card = self.lookup_model_card(tool_name) + if "gguf_repo" in model_card: + repo_name = model_card["gguf_repo"] + else: + repo_name = tool_name + + self.pull_snapshot_from_hf(repo_name, local_model_repo_path, api_key=api_key) + + return 0 + + def list_llm_tools(self): + """Provides a list of the currently available SLIM tools available in the catalog. """ + return _ModelRegistry().get_llm_fx_tools_list() + + def get_llm_fx_mapping(self): + """Provides a current mapping of Tools to LLM Function Call - this mapping is used by LLMfx class to + orchestrate among multiple models deployed locally as tools. """ + return _ModelRegistry().get_llm_fx_mapping() + + def get_test_script(self, model_name): + + """ Checks if a test script is available with the model repo - and if so, + retrieves the test set as a json dictionary """ + + test_set = None + + model_repo_path = LLMWareConfig().get_model_repo_path() + local_model_path = os.path.join(model_repo_path, model_name) + if os.path.exists(local_model_path): + model_files = os.listdir(local_model_path) + if "config.json" in model_files: + config_json = json.load(open(os.path.join(local_model_path, "config.json"), "r", + encoding="utf-8")) + if "test_set" in config_json: + test_set = config_json["test_set"] + + return test_set + + def tool_test_run(self, model_name, api_key=None, verbose=False): + + """ Loads a tool, if required, and executes a series of test runs. + Note: only available for 'tool' implementation models. """ + + model_card = self.lookup_model_card(model_name) + + if not model_card: + raise ModelNotFoundException(model_name) + + if "snapshot" in model_card: + + model = self.load_model(model_name, api_key=api_key) + test_set = self.get_test_script(model_name) + + if test_set: + + if "function_call" not in model_card: + + # run traditional inference on test set + print("\nTest: ", model_name) + + for i, entries in enumerate(test_set): + + print("\nupdate: query - ", i, entries["query"]) + + response = model.inference(entries["query"],add_context=entries["context"], + add_prompt_engineering="default_with_context") + print("update: llm_response - ", i, response["llm_response"]) + if "answer" in entries: + print("update: gold answer - ", i, entries["answer"]) + + else: + + print("\nTest: ", model_name) + + for i, entries in enumerate(test_set): + + text = entries["context"] + + # special case for nli + if "conclusion" in entries: + text = "Evidence: " + text + "\nConclusion: " + entries["conclusion"] + + response = model.function_call(text, get_logits=True) + + # if verbose: + print(f"\nupdate: context - test - {i} - {text}") + + print("update: 'llm_response' - test - ", i, response["llm_response"]) + + # print("update: 'output_tokens' - test - ", i, response["output_tokens"]) + + logit_analysis = self.logit_analysis(response, model_card, model.hf_tokenizer_name, + api_key=api_key) + + if "ryg_string" in logit_analysis: + print("update: red-yellow-green confidence - ", logit_analysis["ryg_string"]) + + if "confidence_score" in logit_analysis: + print("update: confidence score - ", logit_analysis["confidence_score"]) + + if "marker_tokens" in logit_analysis: + if logit_analysis["marker_tokens"]: + print("update: marker tokens - ", logit_analysis["marker_tokens"]) + + if "choices" in logit_analysis: + choices = logit_analysis["choices"] + if len(choices) > 0: + choices = choices[0] + + print("update: choices - ", choices) + + """ + for keys, values in logit_analysis.items(): + print("update: logit analysis - ", keys, values) + """ + + return 0 + + def list_function_call_models(self): + + """ Returns a list of model card dictionaries for models that implement function_calls.""" + + fc_model_list = [] + for models in self.global_model_list: + if "function_call" in models: + # confirm that value is positive + if models["function_call"]: + fc_model_list.append(models) + + return fc_model_list + + def logit_analysis(self, response, model_card, hf_tokenizer_name,api_key=None): + + """ Analyzes logits from llm response - currently exposed only as option for function + call inferences in HFGenerative and GGUFGenerative models. """ + + logit_analysis = [] + ryg_string = "" + vz_choices = [] + marker_token_probs = [] + low_confidence_choices = [] + confidence_score = -1 + + # only go ahead if logits found in response + if "logits" not in response: + logging.warning("update: logit_analysis requires a response dictionary with 'logits' key- skipping") + return logit_analysis + + try: + from colorama import Fore + red = Fore.RED + green = Fore.GREEN + yellow = Fore.YELLOW + color_reset = Fore.RESET + except: + logging.warning("update: logit analysis - could not import colorama - please import to see color coded" + "visualization of the output string confidence level.") + + # setting color inserts to empty + red = "" + green = "" + yellow = "" + color_reset = "" + + try: + # tokenizer used as part of building confidence level string + from transformers import AutoTokenizer + except: + raise DependencyNotInstalledException("transformers") + + """ Analyzes logits from llm response """ + + # value zone markers + vz_start = [] + vz_stop = [] + + if "value_zone_markers" in model_card: + vz_start = model_card["value_zone_markers"]["start"] + vz_stop = model_card["value_zone_markers"]["stop"] + + # marker tokens for sentiment analysis + marker_tokens = [] + marker_token_lookup = {} + + if "marker_tokens" in model_card: + marker_tokens = model_card["marker_tokens"] + if "marker_token_lookup" in model_card: + marker_token_lookup = model_card["marker_token_lookup"] + + if "logits" in response: + + logits = response["logits"] + + # hf tokenizer name + tokenizer = AutoTokenizer.from_pretrained(hf_tokenizer_name, token=api_key) + + ryg_string = "" + + token_probs = [] + marker_token_probs = [] + vz_choices = [] + vz_capture_on = False + + for i, toks in enumerate(response["output_tokens"]): + + if toks in vz_stop: + vz_capture_on = False + + if toks in marker_tokens: + + for x in range(0, len(logits[i])): + if logits[i][x][0] in marker_tokens: + new_entry = (marker_token_lookup[logits[i][x][0]], + logits[i][x][0], + logits[i][x][1]) + marker_token_probs.append(new_entry) + + if vz_capture_on: + + new_entry = {} + for x in range(0,3): + key = "choice_" + str(x+1) + new_entry.update({key: [tokenizer.decode(logits[i][x][0]), + logits[i][x][1],logits[i][x][0]]}) + + # set confidence score as normalized logit value of first token in value zone + #TODO: need to assess whether averaging across multiple tokens more effective + + if len(vz_choices) == 0: + if logits[i][x][0] == toks: + confidence_score = logits[i][x][1] + + vz_choices.append(new_entry) + + if toks in vz_start: + vz_capture_on = True + + if toks == 2: + break + + for x in range(0, len(logits[i])): + + if toks == logits[i][x][0]: + + token_probs.append(logits[i][x][1]) + + if logits[i][x][1] > 0.70: + ryg_string += green + tokenizer.decode([1, logits[i][x][0]]) + + if 0.3 <= logits[i][x][1] <= 0.70: + ryg_string += yellow + tokenizer.decode([1, logits[i][x][0]]) + + new_entry = {} + for y in range(0, 3): + key = "choice_" + str(y + 1) + new_entry.update({key: [tokenizer.decode(logits[i][y][0]), + logits[i][y][1], logits[i][y][0]]}) + + low_confidence_choices.append(new_entry) + + if logits[i][x][1] < 0.3: + ryg_string += red + tokenizer.decode([1, logits[i][x][0]]) + + new_entry = {} + for y in range(0, 3): + key = "choice_" + str(y + 1) + new_entry.update({key: [tokenizer.decode(logits[i][y][0]), + logits[i][y][1], logits[i][y][0]]}) + + low_confidence_choices.append(new_entry) + + ryg_string = ryg_string.replace("", "") + + logit_analysis = {"ryg_string": ryg_string + color_reset, "choices": vz_choices, + "marker_tokens": marker_token_probs, + "low_confidence_choices": low_confidence_choices, + "confidence_score": confidence_score} + + return logit_analysis + + def fc_output_values(self, model_name): + + """ Takes as input a model_name, and if the model is function-calling, then will output a list + of the expected function calling output values for the model. If no value provided, or no specific + expected 'constraints' on output values, then returns an empty list. """ + + output_values = [] + + model_card = self.lookup_model_card(model_name) + + if model_card: + if "fc_output_values" in model_card: + output_values = model_card["fc_output_values"] + + else: + logging.error("error: ModelCatalog - could not identify model card for selected model - %s ", model_name) + + raise ModelNotFoundException(model_name) + + return output_values + + def fc_primary_keys(self, model_name): + + """ Takes as input a model_name, and if the model is function-calling, then will output a list of the + primary keys, if any, to be passed as parameters to the model. If no primary keys, then returns an + empty list. """ + + output_keys = [] + + model_card = self.lookup_model_card(model_name) + + if model_card: + if "primary_keys" in model_card: + output_keys = model_card["primary_keys"] + else: + logging.error("error: ModelCatalog - could not identify model card for selected model - %s ", model_name) + + raise ModelNotFoundException(model_name) + + return output_keys class PromptCatalog: @@ -1261,10 +1656,17 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer # expect that .api_base will route to local open chat inference server # -- assumed that *** api_key likely not used *** + # -- in openai >= 1.0: .api_base replaced with 'base_url' attribute + + try: + from openai import OpenAI + except ImportError: + raise DependencyNotInstalledException("openai >= 1.0") + if not self.api_key: - openai.api_key = "not-used" + client = OpenAI(api_key="not-used",base_url=self.api_base) else: - openai.api_key = self.api_key + client = OpenAI(api_key=self.api_key,base_url=self.api_base) # default case - pass the prompt received without change prompt_enriched = prompt @@ -1272,24 +1674,20 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer usage = {} time_start = time.time() - # save current state of openai.api_base - openai_api_base_entering_state = openai.api_base - - # set api_base based on configs - openai.api_base = self.api_base - try: if self.model_type == "chat": messages = self.prompt_engineer_chat(prompt_enriched, self.add_context, inference_dict) - response = openai.ChatCompletion.create(model=self.model_name,messages=messages, - max_tokens=self.target_requested_output_tokens) + # using openai >1.0 api -> create client object, and output is pydantic, not dicts + + response = client.chat.completions.create(model=self.model_name,messages=messages, + max_tokens=self.target_requested_output_tokens) """ assume 'minimal' api output conformance with OpenAI """ - text_out = response["choices"][0]["message"]["content"] + text_out = response.choices[0].message.content """ note: some openchat api do not support providing usage output consistent with OpenAI API """ @@ -1299,16 +1697,16 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer """ best effort to gather usage data if conforms with OpenAI """ - if "usage" in response: + if hasattr(response, "usage"): - if "prompt_tokens" in response["usage"]: - pt = response["usage"]["prompt_tokens"] + if hasattr(response.usage, "prompt_tokens"): + pt = response.usage.prompt_tokens - if "completion_tokens" in response["usage"]: - ct = response["usage"]["completion_tokens"] + if hasattr(response.usage, "completion_tokens"): + ct = response.usage.completion_tokens - if "total_tokens" in response["usage"]: - tt = response["usage"]["total_tokens"] + if hasattr(response.usage, "total_tokens"): + tt = response.usage.total_tokens usage = {"input": pt, "output": ct, @@ -1328,13 +1726,13 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer text_prompt = prompt_final + self.separator - response = openai.Completion.create(model=self.model_name, prompt=text_prompt, - temperature=self.temperature, - max_tokens=self.target_requested_output_tokens) + response = client.completions.create(model=self.model_name, prompt=text_prompt, + temperature=self.temperature, + max_tokens=self.target_requested_output_tokens) """ assume 'minimal' api output conformance with OpenAI """ - text_out = response["choices"][0]["text"] + text_out = response.choices[0].text """ note: some openchat api do not support providing usage output consistent with OpenAI API """ @@ -1344,16 +1742,16 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer """ best effort to gather usage data if conforms with OpenAI API """ - if "usage" in response: + if hasattr(response, "usage"): - if "prompt_tokens" in response["usage"]: - pt = response["usage"]["prompt_tokens"] + if hasattr(response.usage, "prompt_tokens"): + pt = response.usage.prompt_tokens - if "completion_tokens" in response["usage"]: - ct = response["usage"]["completion_tokens"] + if hasattr(response.usage, "completion_tokens"): + ct = response.usage.completion_tokens - if "total_tokens" in response["usage"]: - tt = response["usage"]["total_tokens"] + if hasattr(response.usage, "total_tokens"): + tt = response.usage.total_tokens usage = {"input": pt, "output": ct, @@ -1369,15 +1767,11 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer logging.error("error: Open Chat model inference produced error - %s ", e) - # reset openai.api_base - openai.api_base = openai_api_base_entering_state - output_response = {"llm_response": text_out, "usage": usage} return output_response - class OllamaModel: """ OllamaModel class implements the Ollama model prompt API and is intended for use in building @@ -1761,9 +2155,11 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer # default case - pass the prompt received without change prompt_enriched = prompt - # set as default openai base - openai_api_base_entering_state = openai.api_base - openai.api_base = "https://api.openai.com/v1" + # new - change with openai v1 api + try: + from openai import OpenAI + except ImportError: + raise DependencyNotInstalledException("openai >= 1.0") usage = {} time_start = time.time() @@ -1774,42 +2170,39 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer messages = self.prompt_engineer_chatgpt3(prompt_enriched, self.add_context, inference_dict) - # different api for "chat completion" -> only applies to ChatGPT = 'gpt-3.5-turbo' - openai.api_key = self.api_key - response = openai.ChatCompletion.create(model=self.model_name,messages=messages, - max_tokens=self.target_requested_output_tokens) + # updated OpenAI client to >v1.0 API - create client, and returns pydantic objects + + client = OpenAI(api_key=self.api_key) + response = client.chat.completions.create(model=self.model_name,messages=messages, + max_tokens=self.target_requested_output_tokens) - text_out = response["choices"][0]["message"]["content"] + text_out = response.choices[0].message.content - usage = {"input": response["usage"]["prompt_tokens"], - "output": response["usage"]["completion_tokens"], - "total": response["usage"]["total_tokens"], + usage = {"input": response.usage.prompt_tokens, + "output": response.usage.completion_tokens, + "total": response.usage.total_tokens, "metric": "tokens", "processing_time": time.time() - time_start} - # logging.info("update: open ai response: %s ", response) - else: - # 'instruct gpt' models + # openai traditional 'instruct gpt' completion models prompt_enriched = self.prompt_engineer(prompt_enriched, self.add_context, inference_dict=inference_dict) prompt_final = prompt_enriched text_prompt = prompt_final + self.separator - logging.info("update: openai model - FINAL PROMPT: %s %s ", self.model_name, prompt_final) - openai.api_key = self.api_key - response = openai.Completion.create(model=self.model_name, prompt=text_prompt, - temperature=self.temperature, - max_tokens=self.target_requested_output_tokens) - - logging.info("update: open ai response: %s ", response["choices"]) - text_out = response["choices"][0]["text"] - # openai response "usage" dict - {"completion_tokens" | "prompt_tokens" | total_tokens"} - - usage = {"input": response["usage"]["prompt_tokens"], - "output": response["usage"]["completion_tokens"], - "total": response["usage"]["total_tokens"], + + client = OpenAI(api_key=self.api_key) + response = client.completions.create(model=self.model_name, prompt=text_prompt, + temperature=self.temperature, + max_tokens=self.target_requested_output_tokens) + + text_out = response.choices[0].text + + usage = {"input": response.usage.prompt_tokens, + "output": response.usage.completion_tokens, + "total": response.usage.total_tokens, "metric": "tokens", "processing_time": time.time() - time_start} @@ -1822,12 +2215,6 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer # raise LLMInferenceResponseException(e) logging.error("error: OpenAI model inference produced error - %s ", e) - # reset openai api_base - openai.api_base = openai_api_base_entering_state - - # will look to capture usage metadata - # "usage" = {"completion_tokens", "prompt_tokens", "total_tokens"} - output_response = {"llm_response": text_out, "usage": usage} return output_response @@ -1937,6 +2324,11 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer if not self.api_key: logging.error("error: invoking Anthropic Claude Generative model with no api_key") + try: + import anthropic + except ImportError: + raise DependencyNotInstalledException("anthropic") + client = anthropic.Client(api_key=self.api_key) # prototype prompt sample: prompt_enriched = "\n\nHuman:" + " please read the following- " + @@ -1988,13 +2380,6 @@ class GoogleGenModel: def __init__(self, model_name=None, api_key=None, context_window=8192): - try: - from vertexai.preview.language_models import TextGenerationModel, TextEmbeddingModel - from vertexai import init - import google.cloud.aiplatform as aiplatform - except: - raise DependencyNotInstalledException("google-cloud-aiplatform") - self.api_key = api_key self.model_name = model_name self.model = None @@ -2078,6 +2463,13 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer if "max_tokens" in inference_dict: self.target_requested_output_tokens = inference_dict["max_tokens"] + try: + from vertexai.preview.language_models import TextGenerationModel, TextEmbeddingModel + from vertexai import init + import google.cloud.aiplatform as aiplatform + except ImportError: + raise DependencyNotInstalledException("google-cloud-aiplatform") + # api_key if api_key: self.api_key = api_key @@ -2252,6 +2644,11 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer if not self.api_key: logging.error("error: invoking AI21 Jurassic model with no api_key") + try: + import ai21 + except ImportError: + raise DependencyNotInstalledException("ai21") + prompt_enriched = prompt prompt_enriched = self.prompt_engineer(prompt_enriched,self.add_context, inference_dict=inference_dict) @@ -2419,6 +2816,11 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer if not self.api_key: logging.error("error: invoking Cohere Generative model with no api_key") + try: + import cohere + except ImportError: + raise DependencyNotInstalledException("cohere") + co = cohere.Client(self.api_key) time_start = time.time() @@ -2778,23 +3180,38 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, infer class OpenAIEmbeddingModel: - """ OpenaIEmbeddingModel class implements the OpenAI API for embedding models, specifically text-ada. """ + """ OpenaIEmbeddingModel class implements the OpenAI API for embedding models. """ - def __init__(self, model_name=None, api_key=None, embedding_dims=None): + def __init__(self, model_name=None, api_key=None, embedding_dims=None, model_card=None, max_len=None): # must have elements for embedding model self.model_name = model_name self.api_key = api_key + self.model_card = model_card + self.tokenizer = None if not embedding_dims: self.embedding_dims = 1536 else: self.embedding_dims = embedding_dims - self.max_total_len = 2048 + # openai standard for embeddings is 8191 as of feb 2024 + self.max_total_len = 8191 + self.max_len = self.max_total_len + + if model_card: + if "embedding_dims" in model_card: + self.embedding_dims = model_card["embedding_dims"] + + if "context_window" in model_card: + self.max_total_len = model_card["context_window"] self.error_message = "\nUnable to connect to OpenAI. Please try again later." + if max_len: + if max_len < self.max_total_len: + self.max_len = max_len + def set_api_key(self, api_key,env_var="USER_MANAGED_OPENAI_API_KEY"): # set api_key @@ -2808,15 +3225,15 @@ def _get_api_key(self, env_var="USER_MANAGED_OPENAI_API_KEY"): self.api_key = os.environ.get(env_var) return self.api_key + def get_tokenizer(self): + self.tokenizer = Utilities().get_default_tokenizer() + return self.tokenizer + def token_counter(self, text_sample): - tokenizer = Utilities().get_default_tokenizer() - toks = tokenizer.encode(text_sample).ids - return len(toks) + return len(self.tokenizer.encode(text_sample).ids) def embedding(self, text_sample, api_key=None): - model = "text-embedding-ada-002" - if api_key: self.api_key = api_key @@ -2834,31 +3251,57 @@ def embedding(self, text_sample, api_key=None): text_prompt = [text_sample] input_len = 1 - # set as default openai base - openai_api_base_entering_state = openai.api_base - openai.api_base = "https://api.openai.com/v1" + try: + from openai import OpenAI + except ImportError: + raise DependencyNotInstalledException("openai >= 1.0") - openai.api_key = self.api_key - response = openai.Embedding.create(model=model, input=text_prompt) + # insert safety check here + safe_samples = [] + safety_buffer = 200 + if self.max_total_len < 8191: + self.max_total_len = 8191 - logging.info("update: response: %s ", response) + tokenizer = self.get_tokenizer() - if input_len == 1: - embedding = response['data'][0]['embedding'] - else: - embedding = [] - for i, entries in enumerate(response['data']): - embedding.append(response['data'][i]['embedding']) + for sample in text_prompt: + + tok_len = self.token_counter(sample) + + if tok_len < (self.max_total_len - safety_buffer): + safe_samples.append(sample) + + else: + + if len(sample) > 300: + display_sample = sample[0:300] + " ... " + else: + display_sample = sample + + logging.warning(f"warning: OpenAI Embedding - input sample len - {tok_len} > context_window size " + f"\ninput_sample - {display_sample} " + f"\n\nSample is being truncated.") - # logging.info("update: embedding only: %s ", embedding) - logging.info("update: embedding dims: %s ", len(embedding)) + tok = tokenizer.encode(sample).ids + tok = tok[0:(self.max_total_len - safety_buffer)] + sample = tokenizer.decode(tok) + safe_samples.append(sample) - # embedding = np.array(embedding) - # embedding_2d = np.expand_dims(embedding, 0) + text_prompt = safe_samples + # end - safety check - # reset global environment variable to state before the inference - # --in most cases, this will be the same, but allows for overloaded use of this var with OpenChat - openai.api_base = openai_api_base_entering_state + # update to open >v1.0 api - create client and output is pydantic objects + client = OpenAI(api_key=self.api_key) + response = client.embeddings.create(model=self.model_name, input=text_prompt) + + # logging.info("update: response: %s ", response) + + if input_len == 1: + embedding = response.data[0].embedding + else: + embedding = [] + for i, entries in enumerate(response.data): + embedding.append(response.data[i].embedding) return embedding @@ -2867,10 +3310,11 @@ class CohereEmbeddingModel: """ CohereEmbeddingModel implements the Cohere API for embedding models. """ - def __init__(self, model_name = None, api_key=None, embedding_dims=None): + def __init__(self, model_name = None, api_key=None, embedding_dims=None, model_card=None,max_len=None): self.api_key = api_key self.model_name = model_name + self.model_card = model_card if not embedding_dims: self.embedding_dims = 4096 @@ -2880,6 +3324,11 @@ def __init__(self, model_name = None, api_key=None, embedding_dims=None): self.max_total_len = 2048 self.error_message = "\nUnable to connect to Cohere. Please try again later." + self.max_len = self.max_total_len + if max_len: + if max_len < self.max_total_len: + self.max_len = max_len + def set_api_key(self, api_key, env_var="USER_MANAGED_COHERE_API_KEY"): # set api_key @@ -2907,6 +3356,11 @@ def embedding(self,text_sample): if not self.api_key: logging.error("error: invoking Cohere embedding model with no api_key") + try: + import cohere + except ImportError: + raise DependencyNotInstalledException("cohere") + co = cohere.Client(self.api_key) # need safety check on length of text_sample @@ -2940,17 +3394,11 @@ class GoogleEmbeddingModel: """ GoogleEmbeddingModel implements the Google API for text embedding models. Note: to use Google models requires a separate install of the Google SDKs, e.g., vertexai and google.cloud.platform """ - def __init__(self, model_name=None, api_key=None, embedding_dims=None): - - try: - from vertexai.preview.language_models import TextGenerationModel, TextEmbeddingModel - from vertexai import init - import google.cloud.aiplatform as aiplatform - except: - raise DependencyNotInstalledException("google-cloud-aiplatform") + def __init__(self, model_name=None, api_key=None, embedding_dims=None, model_card=None, max_len=None): self.api_key = api_key self.model_name = model_name + self.model_card = model_card self.max_total_len = 3072 @@ -2963,6 +3411,11 @@ def __init__(self, model_name=None, api_key=None, embedding_dims=None): self.error_message = "\nUnable to connect to Google/Text Embedding Model. Please try again later." + self.max_len = self.max_total_len + if max_len: + if max_len < self.max_total_len: + self.max_len = max_len + def set_api_key(self, api_key, env_var="USER_MANAGED_GOOGLE_API_KEY"): # set api_key @@ -3001,6 +3454,13 @@ def embedding(self,text_sample, api_key= None): embeddings_output = [] + try: + from vertexai.preview.language_models import TextGenerationModel, TextEmbeddingModel + from vertexai import init + import google.cloud.aiplatform as aiplatform + except ImportError: + raise DependencyNotInstalledException("google-cloud-aiplatform") + try: model = TextEmbeddingModel.from_pretrained("textembedding-gecko@001") @@ -3056,7 +3516,7 @@ class HFEmbeddingModel: """HFEmbeddingModel class implements the API for HuggingFace embedding models. """ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, model_card=None, - embedding_dims=None): + embedding_dims=None, trust_remote_code=False, use_gpu_if_available=True, max_len=None): # pull in expected hf input self.model_name = model_name @@ -3066,26 +3526,77 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo self.model_type = None self.max_total_len = 2048 self.model_architecture = None + self.model_card = model_card + self.safe_buffer = 12 + + # default for HF embedding model -> will be over-ridden by model card / configs, if available + self.context_window = 512 + + if self.model_card: + if "embedding_dims" in self.model_card: + self.embedding_dims = self.model_card["embedding_dims"] + + if "context_window" in self.model_card: + self.context_window = self.model_card["context_window"] + + if self.model_name and not model: + # pull from HF + try: + # will wrap in Exception if import fails and move to model catalog class + from transformers import AutoModel, AutoTokenizer + except ImportError: + raise DependencyNotInstalledException("transformers") + + hf_repo_name = self.model_name + + if not self.model_card: + self.model_card = ModelCatalog().lookup_model_card(model_name) + + if self.model_card: + if "hf_repo" in self.model_card: + hf_repo_name = self.model_card["hf_repo"] + + if api_key: + if torch.cuda.is_available(): + self.model = AutoModel.from_pretrained(hf_repo_name, token=api_key, + trust_remote_code=trust_remote_code, + torch_dtype="auto") + else: + self.model = AutoModel.from_pretrained(hf_repo_name, token=api_key, + trust_remote_code=trust_remote_code) - logging.info("update - loading HF Model - %s", model.config.to_dict()) + self.tokenizer = AutoTokenizer.from_pretrained(hf_repo_name, token=api_key, + trust_remote_code=trust_remote_code) + else: + if torch.cuda.is_available(): + self.model = AutoModel.from_pretrained(hf_repo_name, trust_remote_code=trust_remote_code, + torch_dtype="auto") + else: + self.model = AutoModel.from_pretrained(hf_repo_name, trust_remote_code=trust_remote_code) + + self.tokenizer = AutoTokenizer.from_pretrained(hf_repo_name, trust_remote_code=trust_remote_code) + + self.use_gpu = torch.cuda.is_available() and use_gpu_if_available if self.model: - self.config = model.config.to_dict() + self.config = self.model.config.to_dict() if "hidden_size" in self.config: self.embedding_dims = self.config["hidden_size"] - logging.info("warning: embedding_dims - from config - %s ", self.embedding_dims) if "model_type" in self.config: self.model_type = self.config["model_type"] if "max_position_embeddings" in self.config: - self.max_total_len = self.config["max_position_embeddings"] - if "_name_or_path" in self.config: + try: + self.context_window = int(self.config["max_position_embeddings"]) + except: + pass + + if "_name_or_path" in self.config: self.model_name = self.config["_name_or_path"] - logging.info("update: model_name - from config - %s ", self.model_name) if "architectures" in self.config: if isinstance(self.config["architectures"],list): @@ -3093,23 +3604,34 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo else: self.model_architectures = self.config["architectures"] + self.model.eval() + + if self.use_gpu: + self.model.to('cuda') + else: raise ModelNotFoundException(model_name) # no api key expected or required self.api_key = api_key + # set max len for tokenizer truncation with 'safe_buffer' below context_window size + if self.context_window > self.safe_buffer: + self.max_len = self.context_window - self.safe_buffer + else: + self.max_len = self.context_window + + # option to set smaller size than model context window + if max_len: + if max_len < self.context_window: + self.max_len = max_len + def token_counter(self, text_sample): # need to support HF tokenizer toks = self.tokenizer.encode(text_sample).ids return len(toks) - # this is here for temporary reference - will be removed - def stransformer_embedding(self, sentence): - embedding = self.model.encode(sentence, convert_to_tensor=True) - embedding_2d = embedding.unsqueeze(0) - return embedding_2d - + @torch.no_grad() def embedding (self, text_sample, api_key=None): # return embeddings only @@ -3119,25 +3641,26 @@ def embedding (self, text_sample, api_key=None): else: sequence = [text_sample] - logging.info("update: HFEmbedding.embedding() - %s ", len(text_sample)) + model_inputs = self.tokenizer(sequence, truncation=True, max_length=self.max_len, return_tensors="pt",padding=True) - # shorter than 512 - model_inputs = self.tokenizer(sequence, truncation=True, max_length=500, return_tensors="pt",padding=True) - - model_outputs = self.model(model_inputs.input_ids, - attention_mask=model_inputs.attention_mask, output_hidden_states=True) - - # the [cls] aggregated embedding is in the last hidden state - # dims of [1, 768] + if self.use_gpu: + input_ids = model_inputs.input_ids.to('cuda') + attn_mask = model_inputs.attention_mask.to('cuda') + else: + input_ids = model_inputs.input_ids.to('cpu') + attn_mask = model_inputs.attention_mask.to('cpu') - embedding = model_outputs.hidden_states[-1][:,0] + model_outputs = self.model(input_ids, attention_mask=attn_mask) - # embedding = embedding.detach().numpy() - logging.info("update: hf embeddings output shape - %s ", embedding.shape) + embedding = model_outputs.last_hidden_state[:,0] # normalize hf embeddings embeddings_normalized = torch.nn.functional.normalize(embedding, p=2, dim=1) - embeddings_normalized = embeddings_normalized.detach().numpy() + + if self.use_gpu: + embeddings_normalized = np.array(embeddings_normalized.detach().to('cpu')) + else: + embeddings_normalized = embeddings_normalized.detach().numpy() return embeddings_normalized @@ -3157,13 +3680,35 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo # pull in expected hf input self.model_name = model_name + self.hf_tokenizer_name = model_name self.model = model - self.tokenizer= tokenizer - + self.tokenizer = tokenizer + + # *** NEW INSERT - Function Calls *** + self.model_card = model_card + self.logits_record = [] + self.get_logits = False + self.output_tokens = [] + self.top_logit_count = 10 + self.primary_keys = None + self.function = None + self.fc_supported = False + + if model_card: + + if "primary_keys" in model_card: + self.primary_keys = model_card["primary_keys"] + + if "function" in model_card: + self.function = model_card["function"] + + if "function_call" in model_card: + self.fc_supported = model_card["function_call"] + # note - these two parameters will control how prompts are handled - model-specific self.prompt_wrapper = prompt_wrapper self.instruction_following = instruction_following - + # instantiate if model_name passed without actual model and tokenizer if model_name and not model and not tokenizer: @@ -3173,27 +3718,41 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo except: raise DependencyNotInstalledException("transformers") + hf_repo_name = self.model_name + + if not self.model_card: + self.model_card = ModelCatalog().lookup_model_card(self.model_name) + + if self.model_card: + if "hf_repo" in self.model_card: + hf_repo_name = self.model_card["hf_repo"] + if api_key: if torch.cuda.is_available(): - self.model = AutoModelForCausalLM.from_pretrained(model_name,token=api_key, trust_remote_code=trust_remote_code, torch_dtype="auto") + self.model = AutoModelForCausalLM.from_pretrained(hf_repo_name, token=api_key, + trust_remote_code=trust_remote_code, + torch_dtype="auto") else: - self.model = AutoModelForCausalLM.from_pretrained(model_name,token=api_key, trust_remote_code=trust_remote_code) - - self.tokenizer = AutoTokenizer.from_pretrained(model_name,token=api_key, trust_remote_code=trust_remote_code) + self.model = AutoModelForCausalLM.from_pretrained(hf_repo_name, token=api_key, + trust_remote_code=trust_remote_code) + + self.tokenizer = AutoTokenizer.from_pretrained(hf_repo_name, token=api_key, + trust_remote_code=trust_remote_code) else: if torch.cuda.is_available(): - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=trust_remote_code, torch_dtype="auto") + self.model = AutoModelForCausalLM.from_pretrained(hf_repo_name, trust_remote_code=trust_remote_code, + torch_dtype="auto") else: - self.model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=trust_remote_code) - self.tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=trust_remote_code) - + self.model = AutoModelForCausalLM.from_pretrained(hf_repo_name, trust_remote_code=trust_remote_code) + self.tokenizer = AutoTokenizer.from_pretrained(hf_repo_name, trust_remote_code=trust_remote_code) + # set to defaults for HF models in Model Catalog # this can be over-ridden post initiation if needed for custom models self.prompt_wrapper = "human_bot" self.instruction_following = False - + self.trailing_space = "" - + self.model_type = None self.config = None self.max_total_len = context_window @@ -3215,10 +3774,10 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo self.config = self.model.config else: self.config = self.model.config.to_dict() - + if "trailing_space" in self.config: self.trailing_space = self.config["trailing_space"] - + if "eos_token_id" in self.config: # only use to set if value is not None if self.config["eos_token_id"]: @@ -3234,7 +3793,7 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo self.max_total_len = self.config["max_position_embeddings"] if "architectures" in self.config: - if isinstance(self.config["architectures"],list): + if isinstance(self.config["architectures"], list): self.model_architectures = self.config["architectures"][0] else: self.model_architectures = self.config["architectures"] @@ -3260,15 +3819,18 @@ def __init__(self, model=None, tokenizer=None, model_name=None, api_key=None, mo self.add_prompt_engineering = False self.add_context = "" - def set_api_key (self, api_key, env_var="USER_MANAGED_HF_API_KEY"): + def set_api_key(self, api_key, env_var="USER_MANAGED_HF_API_KEY"): + + """ Sets the API key - generally not needed for public HF repositories. """ - # set api_key os.environ[env_var] = api_key logging.info("update: added and stored HF api_key in environmental variable- %s", env_var) return self - def _get_api_key (self, env_var="USER_MANAGED_HF_API_KEY"): + def _get_api_key(self, env_var="USER_MANAGED_HF_API_KEY"): + + """ Gets API key from os.environ variable. """ self.api_key = os.environ.get(env_var) @@ -3278,11 +3840,18 @@ def _get_api_key (self, env_var="USER_MANAGED_HF_API_KEY"): return self.api_key def token_counter(self, text_sample): + + """ Quick approximate token counter - uses default tokenizer so may have minor differences from the + model's actual tokenization. """ + tokenizer = Utilities().get_default_tokenizer() toks = tokenizer.encode(text_sample).ids + return len(toks) - def prompt_engineer (self, query, context, inference_dict): + def prompt_engineer(self, query, context, inference_dict): + + """ Applies prompt and templating preparation. """ # if loaded model was not pretrained on instruction_following, then skip any instructions if not self.instruction_following: @@ -3334,10 +3903,13 @@ def prompt_engineer (self, query, context, inference_dict): return prompt_engineered + @torch.no_grad() def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None, inference_dict=None): + """ Executes generation inference on model. """ + # first prepare the prompt if add_context: @@ -3346,6 +3918,22 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_k if add_prompt_engineering: self.add_prompt_engineering = add_prompt_engineering + # add defaults if add_prompt_engineering not set + if not self.add_prompt_engineering: + + if self.add_context: + self.add_prompt_engineering = "default_with_context" + else: + self.add_prompt_engineering = "default_no_context" + + # end - defaults update + + # show warning if function calling model + if self.fc_supported: + logging.warning("warning: this is a function calling model - using .inference may lead to unexpected " + "results. Recommended to use the .function_call method to ensure correct prompt " + "template packaging.") + if inference_dict: if "temperature" in inference_dict: @@ -3359,17 +3947,17 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_k if self.add_prompt_engineering: prompt_enriched = self.prompt_engineer(prompt, self.add_context, inference_dict=inference_dict) prompt_final = prompt_enriched - + # text_prompt = prompt_final + "\n" - + # most models perform better with no trailing space or line-break at the end of prompt # -- in most cases, the trailing space will be "" # -- yi model prefers a trailing "\n" # -- keep as parameterized option to maximize generation performance # -- can be passed either thru model_card or model config from HF - + text_prompt = prompt_final + self.trailing_space - + # second - tokenize to get the input_ids tokenizer_output = self.tokenizer.encode(text_prompt) @@ -3405,7 +3993,7 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_k # default settings pad_token_id = 0 - + # for most models, eos_token_id = 0, but llama and mistral = 2 eos_token_id = [self.eos_token_id] # eos_token_id = [0] @@ -3452,12 +4040,17 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_k # need to invoke forward pass on model # outputs = self.model(inp0,inp1,pkv) - outputs = self.model(input_ids=inp0,attention_mask=inp1, past_key_values=pkv, + outputs = self.model(input_ids=inp0, attention_mask=inp1, past_key_values=pkv, return_dict=True) new_tokens_generated += 1 - next_token_logits = outputs.logits[:,-1,:] + next_token_logits = outputs.logits[:, -1, :] + + # capture top logits - not currently activated for inference + # self.register_top_logits(next_token_logits) + # shape of next_token_logits = torch.Size([1, 32000]) + # print("next token logits shape - ", next_token_logits.shape) if self.temperature: next_token_scores = next_token_logits / self.temperature @@ -3519,12 +4112,12 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_k # print("update: output only - ", output_only) output_str = self.tokenizer.decode(output_only) - + # post-processing clean-up - stop at endoftext eot = output_str.find("<|endoftext|>") if eot > -1: output_str = output_str[:eot] - + # new post-processing clean-up - stop at eots = output_str.find("") if eots > -1: @@ -3533,15 +4126,15 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_k # post-processing clean-up - start after bot wrapper bot = output_str.find(":") if bot > -1: - output_str = output_str[bot+len(":"):] - + output_str = output_str[bot + len(":"):] + # new post-processing cleanup - skip repeating starting boss = output_str.find("") if boss > -1: - output_str = output_str[boss+len(""):] - + output_str = output_str[boss + len(""):] + # end - post-processing - + total_len = len(outputs_np) usage = {"input": input_token_len, @@ -3554,6 +4147,304 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_k return output_response + def fc_prompt_engineer(self, context, params=None, function=None): + + """ Prompt engineering for Function Call prompts. """ + + if not params: + params = self.primary_keys + + if not function: + function = self.function[0] + + # prepare SLIM prompt + class_str = "" + for key in params: + class_str += str(key) + ", " + if class_str.endswith(", "): + class_str = class_str[:-2] + + f = str(function) + + # key templating format for SLIM function calls + full_prompt = ": " + context + "\n" + "<{}> {} ".format(f, class_str, f) + "\n:" + + full_prompt = full_prompt + self.trailing_space + + return full_prompt + + def register_top_logits(self, next_token_logit): + + """ Retrieves the logits for current sample, and packages into indexed top list and + registers in self.logit_record. """ + + # assumes input of next_token_logit from generation script + # will be a tensor of shape [1,vocab_size] + + logit_size = next_token_logit.shape[-1] + logit = torch.squeeze(next_token_logit) + + if self.use_gpu: + logit_array = np.array(logit.to('cpu')) + else: + logit_array = np.array(logit) + + sm = np.exp(logit_array) / sum(np.exp(logit_array)) + + sm_sorted = np.sort(sm) + sm_args_sorted = np.argsort(sm) + + top_logits = [] + # by default, self.top_logit_count = 10, will get the top 10 highest values in logit output + for x in range(0, self.top_logit_count): + pair = (sm_args_sorted[logit_size - x - 1], sm_sorted[logit_size - x - 1]) + top_logits.append(pair) + + self.logits_record.append(top_logits) + + return top_logits + + @torch.no_grad() + def function_call(self, context, function=None, params=None, get_logits=True): + + """ This is the key inference method for SLIM models - takes a context passage and a key list + which is packaged in the prompt as the keys for the dictionary output""" + + if not self.fc_supported: + logging.warning("warning: HFGenerativeModel - loaded model does not support function calls. " + "Please either use the standard .inference method with this model, or use a " + "model that has 'function_calls' key set to True in its model card.") + return [] + + # reset and start from scratch with new function call + self.output_tokens = [] + self.logits_record = [] + + if get_logits: + self.get_logits = get_logits + + if params: + self.primary_keys = params + + if not self.primary_keys: + logging.warning("warning: function call - no keys provided - function call may yield unpredictable results") + + prompt = self.fc_prompt_engineer(context, params=self.primary_keys, function=function) + + # second - tokenize to get the input_ids + + tokenizer_output = self.tokenizer.encode(prompt) + input_token_len = len(tokenizer_output) + input_ids = torch.tensor(tokenizer_output).unsqueeze(0) + + # explicit check and setting to facilitate debugging + if self.use_gpu: + input_ids = input_ids.to('cuda') + else: + input_ids = input_ids.to('cpu') + + # time start + time_start = time.time() + + # Note: this is a simplified 'sampling' generation loop, derived from the far more + # sophisticated Generation capabilities provided by the Transformers library + # It is included here to enable transformers users to easily extend llmware to include + # their favorite generative models in the transformers library. + + # The code below contains code copied from, derived from or inspired from the Huggingface + # transformers generation code. + # (https: // github.com / huggingface / transformers / src / transformers / generation) + + # Copyright 2018 The Google AI Language Team Authors and The HuggingFace Inc.team. + # Copyright(c) 2018, NVIDIA CORPORATION.All rights reserved. + # Licensed under the Apache License, Version 2.0(the "License"); you may not use this + # file except in compliance with the License. You may obtain a copy of the License at + # http: // www.apache.org / licenses / LICENSE - 2.0 Unless required by applicable law or agreed + # to in writing, software distributed under the License is distributed on an "AS IS" BASIS, + # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License + # for the specific language governing permissions and limitations under the License. + + # default settings + pad_token_id = 0 + + # for most models, eos_token_id = 0, but llama and mistral = 2 + eos_token_id = [self.eos_token_id] + # eos_token_id = [0] + + eos_token_id_tensor = torch.tensor(eos_token_id).to(input_ids.device) + + # keep track of which sequences are already finished + unfinished_sequences = torch.ones(input_ids.shape[0], dtype=torch.long, device=input_ids.device) + + this_peer_finished = False # used by synced_gpus only + # auto-regressive generation + new_tokens_generated = 0 + + attn_mask = torch.ones(input_ids.shape[1]).unsqueeze(0) + + # explicit check and setting to facilitate debugging, if needed + if self.use_gpu: + attn_mask = attn_mask.to('cuda') + else: + attn_mask = attn_mask.to('cpu') + + batch_size = input_ids.shape[0] + seq_len = input_ids.shape[1] + + pkv = None + + while True: + + inp_one_time: torch.LongTensor = input_ids + + if new_tokens_generated > 0: + inp_one_time = input_ids[:, -1:] + + # explicit check and setting to facilitate debugging, if needed + if self.use_gpu: + inp0 = inp_one_time.to('cuda') + inp1 = attn_mask.to('cuda') + else: + inp0 = inp_one_time.to('cpu') + inp1 = attn_mask.to('cpu') + + # inp3 = torch.LongTensor([new_tokens_generated]) + + # need to invoke forward pass on model + # outputs = self.model(inp0,inp1,pkv) + + outputs = self.model(input_ids=inp0, attention_mask=inp1, past_key_values=pkv, + return_dict=True) + + new_tokens_generated += 1 + + next_token_logits = outputs.logits[:, -1, :] + + # option to capture logits for analysis + # if self.get_logits: self.register_top_logits(next_token_logits) + + if self.temperature: + next_token_scores = next_token_logits / self.temperature + else: + next_token_scores = next_token_logits + + # sample + probs = nn.functional.softmax(next_token_scores, dim=-1) + next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1) + + # option to capture logits and output tokens for analysis + if self.get_logits: + self.register_top_logits(next_token_logits) + + # capture the output tokens + if self.use_gpu: + next_tokens_np = np.array(next_tokens.to('cpu')) + else: + next_tokens_np = np.array(next_tokens) + + self.output_tokens.append(next_tokens_np[0]) + + # finished sentences should have their next token be a padding token + if eos_token_id is not None: + next_tokens = next_tokens * unfinished_sequences + pad_token_id * (1 - unfinished_sequences) + + # update generated ids, model inputs, and length for next step + input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1) + + # testing output in progress starts here + """ + print("update: input_ids -", input_ids) + # outputs_detached = outputs.to('cpu') + outputs_np = np.array(input_ids[0]) + output_str = self.tokenizer.decode(outputs_np) + print("update: output string - ", output_str) + """ + # end - testing output in progress + + pkv = outputs.past_key_values + + # update attention mask + attn_mask = torch.cat([attn_mask, attn_mask.new_ones((attn_mask.shape[0], 1))], dim=-1) + + # if eos_token was found in one sentence, set sentence to finished + if eos_token_id_tensor is not None: + unfinished_sequences = unfinished_sequences.mul( + next_tokens.tile(eos_token_id_tensor.shape[0], 1).ne(eos_token_id_tensor.unsqueeze(1)).prod( + dim=0) + ) + + # stop when each sentence is finished + if unfinished_sequences.max() == 0: + this_peer_finished = True + + # stop if we exceed the maximum length + if new_tokens_generated > self.target_requested_output_tokens: + this_peer_finished = True + + if this_peer_finished: + break + + # Generation completed - prepare the output + + if self.use_gpu: + outputs_np = np.array(input_ids[0].to('cpu')) + else: + outputs_np = np.array(input_ids[0]) + + output_only = outputs_np[input_token_len:] + + # print("update: output only - ", output_only) + + output_str = self.tokenizer.decode(output_only) + + # post-processing clean-up - stop at endoftext + eot = output_str.find("<|endoftext|>") + if eot > -1: + output_str = output_str[:eot] + + # new post-processing clean-up - stop at + eots = output_str.find("") + if eots > -1: + output_str = output_str[:eots] + + # post-processing clean-up - start after bot wrapper + bot = output_str.find(":") + if bot > -1: + output_str = output_str[bot + len(":"):] + + # new post-processing cleanup - skip repeating starting + boss = output_str.find("") + if boss > -1: + output_str = output_str[boss + len(""):] + + # end - post-processing + + total_len = len(outputs_np) + + usage = {"input": input_token_len, + "output": total_len - input_token_len, + "total": total_len, + "metric": "tokens", + "processing_time": time.time() - time_start} + + try: + output_dict = ast.literal_eval(output_str) + usage.update({"type": "dict"}) + + except: + logging.warning("warning: automatic function call conversion to " + "python dictionary failed - %s", output_str) + output_dict = output_str + usage.update({"type": "string"}) + + output_response = {"llm_response": output_dict, "usage": usage} + + if get_logits: + output_response.update({"logits": self.logits_record}) + output_response.update({"output_tokens": self.output_tokens}) + + return output_response + class ConfigStruct(Structure): @@ -3574,7 +4465,7 @@ class GGUFGenerativeModel: # This implementation of GGUFGenerativeModel includes code derived, inspired, and modified from ctransformers # For more information on ctransformers: please see https://github.com/marella/ctransformers # - # ctransformers is a a Python and CPP wrapper on llama.cpp + # ctransformers is a Python and CPP wrapper on llama.cpp # note: we have attempted to conform with the ctransformers interface specification, for easy portability to # integrate with llmware - over time, this interface specification may evolve # @@ -3601,6 +4492,38 @@ def __init__(self, model_name=None, model_card=None, api_key=None, prompt_wrappe self.add_context = "" self.temperature = 0.3 self.model_type = "gguf" + self.model_card = model_card + + self.gguf_file = None + self.gguf_repo = None + + # *** NEW INSERT - Function Calls *** + + self.logits_record = [] + self.get_logits = False + self.output_tokens = [] + self.top_logit_count = 10 + + self.primary_keys = None + self.function = None + self.hf_tokenizer_name = None + self.fc_supported = False + + if model_card: + + if "primary_keys" in model_card: + self.primary_keys = model_card["primary_keys"] + + if "function" in model_card: + self.function = model_card["function"] + + if "tokenizer" in model_card: + self.hf_tokenizer_name = model_card["tokenizer"] + + if "function_call" in model_card: + self.fc_supported = model_card["function_call"] + + # *** END - INSERT *** if model_card: @@ -3619,9 +4542,17 @@ def __init__(self, model_name=None, model_card=None, api_key=None, prompt_wrappe if "prompt_wrapper" in model_card: self.prompt_wrapper = model_card["prompt_wrapper"] + if "gguf_file" in model_card: + self.gguf_file = model_card["gguf_file"] # e.g., "ggml-model-q4_k_m.gguf" + + if "gguf_repo" in model_card: + self.gguf_repo = model_card["gguf_repo"] # e.g., "llmware/dragon-mistral-7b-v0-gguf" + + if "instruction_following" in model_card: + self.instruction_following = model_card["instruction_following"] + # gguf specific attributes - self.gguf_file = None - self.gguf_repo = None + self.config = config self._model_path = None self._config = config @@ -3644,13 +4575,13 @@ def __init__(self, model_name=None, model_card=None, api_key=None, prompt_wrappe self.top_k = 40 self.top_p = 0.95 self.temperature = 0.3 - self.repetition_penalty= 1.1 - self.last_n_tokens= 64 + self.repetition_penalty = 1.1 + self.last_n_tokens = 64 self.seed = -1 # eval - self.batch_size= 8 - self.threads= -1 + self.batch_size = 8 + self.threads = -1 # generate self.max_new_tokens = 256 @@ -3662,6 +4593,7 @@ def __init__(self, model_name=None, model_card=None, api_key=None, prompt_wrappe self.context_length = 2048 self.gpu_layers = 50 self.mmap = True + self.mlock = False self.model_path = None @@ -3672,10 +4604,12 @@ def __init__(self, model_name=None, model_card=None, api_key=None, prompt_wrappe def load_model_for_inference(self, file_loading_path, model_card=None): + """ Loads GGUF model for inference. """ + if model_card: self.model_name = model_card["model_name"].split("/")[-1] - self.gguf_file = model_card["gguf_file"] # e.g., "ggml-model-q4_k_m.gguf", - self.gguf_repo = model_card["gguf_repo"] # e.g., "llmware/dragon-mistral-7b-v0-gguf" + self.gguf_file = model_card["gguf_file"] # e.g., "ggml-model-q4_k_m.gguf", + self.gguf_repo = model_card["gguf_repo"] # e.g., "llmware/dragon-mistral-7b-v0-gguf" model_file = os.path.join(file_loading_path, self.gguf_file) @@ -3704,6 +4638,8 @@ def load_model_for_inference(self, file_loading_path, model_card=None): def __getattr__(self, name): + """ Maps class methods to ctransformers ctypes dynamic lib methods. """ + # note: this implementation of the CTYPES / CPP interface is intended to be conforming with: # -- https://github.com/marella/ctransformers/blob/main/models/llm.cc @@ -3711,7 +4647,9 @@ def __getattr__(self, name): return partial(getattr(self._lib, name), self._llm) raise AttributeError(f"'LLM' object has no attribute '{name}'") - def tokenize(self, text, add_bos_token = None): + def tokenize(self, text, add_bos_token=None): + + """ Tokenizes text. """ # note: this implementation of the CTYPES / CPP interface is intended to be conforming with: # -- https://github.com/marella/ctransformers/blob/main/models/llm.cc @@ -3725,6 +4663,8 @@ def tokenize(self, text, add_bos_token = None): def detokenize(self, tokens, decode): + """ Encodes text. """ + # note: this implementation of the CTYPES / CPP interface is intended to be conforming with: # -- https://github.com/marella/ctransformers/blob/main/models/llm.cc @@ -3787,11 +4727,17 @@ def sample(self): n_last = len(last_tokens) last_tokens = (c_int * n_last)(*last_tokens) + # new option to save logits + if self.get_logits: + self.register_top_logits() + return self.ctransformers_llm_sample(last_tokens, n_last, top_k, top_p, temperature, repetition_penalty, seed) def prepare_inputs_for_generation(self, tokens): + """ Prepares inputs for generation as part of inference sampling. """ + if not self.reset: return tokens @@ -3808,6 +4754,8 @@ def prepare_inputs_for_generation(self, tokens): def generate(self, tokens): + """ Generation loop. """ + # note: this implementation of the CTYPES / CPP interface is intended to be conforming with: # -- https://github.com/marella/ctransformers/blob/main/models/llm.cc @@ -3818,6 +4766,10 @@ def generate(self, tokens): token = self.sample() + if self.get_logits: + # print("max arg token - ", token) + self.output_tokens.append(token) + self.eval([token]) if self.ctransformers_llm_is_eos_token(token): @@ -3827,6 +4779,8 @@ def generate(self, tokens): def _stream(self, prompt): + """ Sampling method used in inference generation. """ + # note: this implementation of the CTYPES / CPP interface is intended to be conforming with: # -- https://github.com/marella/ctransformers/blob/main/models/llm.cc @@ -3894,6 +4848,8 @@ def _stream(self, prompt): def find_library(self): + """ Identifies correct library by platform. """ + # current implementation support in core library - will expand/evaluate over time lib_path = os.path.join(LLMWareConfig.get_config("shared_lib_path"), "gguf") @@ -3939,6 +4895,8 @@ def find_library(self): def load_library(self): + """ Loads dynamic library to enable GGUF llama_cpp inferences. """ + c_int_p = POINTER(c_int) c_float_p = POINTER(c_float) llm_p = c_void_p @@ -3967,6 +4925,9 @@ def load_library(self): lib.ctransformers_llm_create.argtypes = [c_char_p, c_char_p, ConfigStruct] lib.ctransformers_llm_create.restype = llm_p + # new insert - assigning llm_p to c_void_p + llm_p = c_void_p + lib.ctransformers_llm_delete.argtypes = [llm_p] lib.ctransformers_llm_delete.restype = None @@ -4015,8 +4976,55 @@ def load_library(self): return lib + def unload_model(self): + + """ Unloads a model to release memory """ + + # print("starting to unload") + + if self._llm is not None: + self.ctransformers_llm_delete() + + # print("done - unloaded model") + + return 0 + + def register_top_logits(self): + + """ Retrieves the logits for current sample, and packages into indexed top list and + registers in self.logit_record. """ + + logit_pointer = self.ctransformers_llm_logits_data() + # sm = np.exp(logit_pointer) / sum(np.exp(logit_pointer)) + logit_size = self.ctransformers_llm_logits_size() + + logit_array = np.zeros(logit_size) + + for x in range(0, logit_size): + # print("logit selection: ", x, logit_pointer[x]) + logit_array[x] = logit_pointer[x] + + sm = np.exp(logit_array) / sum(np.exp(logit_array)) + + sm_sorted = np.sort(sm) + sm_args_sorted = np.argsort(sm) + + top_logits = [] + # by default, self.top_logit_count = 10 - so gets top 10 highest values in logit + + for x in range(0, self.top_logit_count): + # generally for llama-based models, logit_size = 32000 + pair = (sm_args_sorted[logit_size - x - 1], sm_sorted[logit_size - x - 1]) + top_logits.append(pair) + + self.logits_record.append(top_logits) + + return top_logits + def set_api_key(self, api_key, env_var="USER_MANAGED_HF_API_KEY"): + """ Sets API key - generally not used in GGUF models. """ + # set api_key os.environ[env_var] = api_key logging.info("update: added and stored HF api_key in environmental variable- %s", env_var) @@ -4025,6 +5033,8 @@ def set_api_key(self, api_key, env_var="USER_MANAGED_HF_API_KEY"): def _get_api_key(self, env_var="USER_MANAGED_HF_API_KEY"): + """ Gets API key - generally not used in GGUF models. """ + self.api_key = os.environ.get(env_var) if not self.api_key: @@ -4033,12 +5043,17 @@ def _get_api_key(self, env_var="USER_MANAGED_HF_API_KEY"): return self.api_key def token_counter(self, text_sample): + + """ Fast approximate token counter. """ + tokenizer = Utilities().get_default_tokenizer() toks = tokenizer.encode(text_sample).ids return len(toks) def prompt_engineer(self, query, context, inference_dict): + """ Prompt engineering, packaging and templating. """ + # if loaded model was not pretrained on instruction_following, then skip any instructions if not self.instruction_following: @@ -4089,7 +5104,10 @@ def prompt_engineer(self, query, context, inference_dict): return prompt_engineered - def inference(self, prompt, add_context=None, add_prompt_engineering=None,api_key=None,inference_dict=None): + def inference(self, prompt, add_context=None, add_prompt_engineering=None, api_key=None, inference_dict=None, + get_logits=False): + + """ Main method for inference generation. """ # first prepare the prompt @@ -4099,6 +5117,29 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None,api_ke if add_prompt_engineering: self.add_prompt_engineering = add_prompt_engineering + # update default handling for no add_prompt_engineering + """ + if not self.add_prompt_engineering: + if self.add_context: + self.add_prompt_engineering = "default_with_context" + else: + self.add_prompt_engineering = "default_no_context" + """ + # end - update + + # show warning if function calling model + if self.fc_supported: + logging.warning("warning: this is a function calling model - using .inference may lead to unexpected " + "results. Recommended to use the .function_call method to ensure correct prompt " + "template packaging.") + + # start with clean logits_record and output_tokens for each function call + self.logits_record = [] + self.output_tokens = [] + + if get_logits: + self.get_logits = get_logits + if inference_dict: if "temperature" in inference_dict: @@ -4123,6 +5164,8 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None,api_ke text_prompt = prompt_final + self.trailing_space + # print("update: GGUFGenerative - inference - text_prompt - ", text_prompt) + time_start = time.time() text = self._stream(text_prompt) @@ -4161,6 +5204,127 @@ def inference(self, prompt, add_context=None, add_prompt_engineering=None,api_ke output_response = {"llm_response": output_str, "usage": usage} + # experimental - add get_logits in inference + + if get_logits: + output_response.update({"logits": self.logits_record}) + output_response.update({"output_tokens": self.output_tokens}) + + # end - experimental + + return output_response + + @torch.no_grad() + def function_call(self, context, function=None, params=None, get_logits=True): + + """ This is the key inference method for SLIM models - takes a context passage and a key list + which is packaged in the prompt as the keys for python dictionary output""" + + if not self.fc_supported: + logging.warning("warning: GGUFGenerativeModel - loaded model does not support function calls. " + "Please either use the standard .inference method with this model, or use a GGUF " + "model that has 'function_calls' key set to True in its model card.") + return [] + + # start with clean logits_record and output_tokens for each function call + self.logits_record = [] + self.output_tokens = [] + + if get_logits: + self.get_logits = get_logits + + if params: + self.primary_keys = params + + if not self.primary_keys: + print("warning: GGUF - function call - no keys provided - function call may yield unpredictable results") + + if not params: + params = self.primary_keys + + if not function: + function = self.function[0] + + # prepare SLIM prompt + class_str = "" + for key in params: + class_str += str(key) + ", " + if class_str.endswith(", "): + class_str = class_str[:-2] + + f = str(function) + + full_prompt = ": " + context + "\n" + "<{}> {} ".format(f, class_str, f) + "\n:" + full_prompt = full_prompt + self.trailing_space + + text_prompt = full_prompt + + # call inference here + time_start = time.time() + + text = self._stream(text_prompt) + output_str = "".join(text) + + # post-processing clean-up - stop at endoftext + eot = output_str.find("<|endoftext|>") + if eot > -1: + output_str = output_str[:eot] + + # new post-processing clean-up - stop at + eots = output_str.find("") + if eots > -1: + output_str = output_str[:eots] + + # post-processing clean-up - start after bot wrapper + bot = output_str.find(":") + if bot > -1: + output_str = output_str[bot + len(":"):] + + # new post-processing cleanup - skip repeating starting + boss = output_str.find("") + if boss > -1: + output_str = output_str[boss + len(""):] + + # end - post-processing + + input_toks = self.token_counter(text_prompt) + output_toks = self.token_counter(output_str) + + usage = {"input": input_toks, + "output": output_toks, + "total": input_toks + output_toks, + "metric": "tokens", + "processing_time": time.time() - time_start} + + + try: + output_dict = ast.literal_eval(output_str) + usage.update({"type": "dict"}) + convert_to_dict = True + except: + logging.warning("warning: automatic conversion of function call output to " + "python dictionary failed -%s.", output_str) + output_dict = output_str + usage.update({"type": "string"}) + convert_to_dict = False + + # quick remediation attempt - usually source of error is cut-off at end + if not convert_to_dict: + triage_terminus = "]'}" + output_str_triage = output_str + triage_terminus + try: + output_dict = ast.literal_eval(output_str_triage) + usage.update({"type": "dict"}) + except: + logging.warning("update: first remediation attempt did not fix dictionary - %s", output_str_triage) + # end - remediation attempt + + output_response = {"llm_response": output_dict, "usage": usage} + + if get_logits: + output_response.update({"logits": self.logits_record}) + output_response.update({"output_tokens": self.output_tokens}) + return output_response @@ -4169,7 +5333,7 @@ class LLMWareSemanticModel: """ LLMWareSemanticModel class implements the LLMWareSemanticModel API, which is based on the SentenceTransformer architecture. """ - def __init__(self, model_name=None, model=None, embedding_dims=None, max_seq_length=150, + def __init__(self, model_name=None, model=None, embedding_dims=None, max_len=150, model_card=None, api_key=None): self.model_name = model_name @@ -4177,7 +5341,7 @@ def __init__(self, model_name=None, model=None, embedding_dims=None, max_seq_len self.max_input_len = 512 self.max_output_len = 512 - self.max_seq_length = max_seq_length + self.max_len = max_len # to be applied to 'passed-in' Sentence Transformers model self.normalize_embeddings = True @@ -4257,7 +5421,7 @@ def load_model_for_inference(self,fp=None, model_card=None): self.model_repo_location = fp self.model = STransformer(self.model_repo_location, model_size=self.model_size, - max_seq_length=self.max_seq_length) + max_seq_length=self.max_len) return self diff --git a/llmware/parsers.py b/llmware/parsers.py index 2aa69b53..d48e832e 100644 --- a/llmware/parsers.py +++ b/llmware/parsers.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -12,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The parsers module implements all parsers, i.e. all conversions fom a modality to bloacks in a database. + +The module currently implements parsers for websites, images, voices, texts, wikis, and dialogs. +""" import time @@ -39,7 +42,7 @@ from llmware.resources import CollectionRetrieval, CollectionWriter, ParserState from llmware.exceptions import DependencyNotInstalledException, FilePathDoesNotExistException, \ - OCRDependenciesNotFoundException, ModuleNotFoundException + OCRDependenciesNotFoundException, LLMWareException class Parser: @@ -266,47 +269,53 @@ def _collator(self, input_folder_path, dupe_check=False): files_to_be_processed = [] duplicate_files = [] - for filename in input_file_names: - filetype = filename.split(".")[-1] + if dupe_check: + # we get a reduced list of input_file_names if in existing_files is files we try to process + duplicate_files_tmp = list(set(input_file_names) - set(existing_files)) + # the duplicates are those that where not in duplicate_files_tmp so we take out the tmp from the input_file_names + # what's left is the duplicates + duplicate_files = list(set(input_file_names) - set(duplicate_files_tmp)) + # the counter is the length of the array + dup_counter = len(duplicate_files) + # We are done with this and we don't need to n times loop as before + # we set the imput_file_names to be the reduced list to not to process dupe files + input_file_names = duplicate_files_tmp - go_ahead = True - if dupe_check: - if filename in existing_files: - go_ahead= False - dup_counter += 1 - duplicate_files.append(filename) - if go_ahead: - files_to_be_processed.append(filename) + for filename in input_file_names: + + filetype = filename.split(".")[-1] + + files_to_be_processed.append(filename) - # copy file into specific channel for targeted parser + # copy file into specific channel for targeted parser - if filetype.lower() in self.office_types: - shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.office_work_folder,filename)) - office_found += 1 + if filetype.lower() in self.office_types: + shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.office_work_folder,filename)) + office_found += 1 - if filetype.lower() in self.pdf_types: - shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.pdf_work_folder, filename)) - pdf_found += 1 + if filetype.lower() in self.pdf_types: + shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.pdf_work_folder, filename)) + pdf_found += 1 - if filetype.lower() in self.text_types: - shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.text_work_folder,filename)) - text_found += 1 + if filetype.lower() in self.text_types: + shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.text_work_folder,filename)) + text_found += 1 - if filetype.lower() in self.ocr_types: - shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.ocr_work_folder,filename)) - ocr_found += 1 + if filetype.lower() in self.ocr_types: + shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.ocr_work_folder,filename)) + ocr_found += 1 - if filetype.lower() in self.voice_types: - shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.voice_work_folder,filename)) - voice_found += 1 + if filetype.lower() in self.voice_types: + shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.voice_work_folder,filename)) + voice_found += 1 - if filetype.lower() in self.zip_types: - shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.zip_work_folder,filename)) - zip_found += 1 + if filetype.lower() in self.zip_types: + shutil.copy(os.path.join(input_folder_path,filename), os.path.join(self.zip_work_folder,filename)) + zip_found += 1 logging.info("update: Duplicate files (skipped): %s ", dup_counter) logging.info("update: Total uploaded: %s ", len(input_file_names)) @@ -564,7 +573,7 @@ def convert_parsing_txt_file_to_json(self, file_path=None, fn="pdf_internal_test # test script for parsing txt file try: - output_file = open(os.path.join(file_path, fn), "r", encoding="utf-8").read() + output_file = open(os.path.join(file_path, fn), "r", encoding="utf-8-sig",errors="ignore").read() except Exception as e: print (e) @@ -1356,7 +1365,7 @@ def parse_office(self, input_fp, write_to_db=True, save_history=True): return output - def parse_text(self, input_fp, write_to_db=True, save_history=True): + def parse_text(self, input_fp, write_to_db=True, save_history=True, dupe_check=False,copy_to_library=False): """ Main entry point to parser for .txt, .csv, .json and .md files """ @@ -1388,64 +1397,76 @@ def parse_text(self, input_fp, write_to_db=True, save_history=True): for file in os.listdir(input_fp): - text_output = [] - # increment and get new doc_id - if write_to_db_on == 1: - self.library.doc_ID = self.library.get_and_increment_doc_id() + # by default, will process all files with text file extensions + go_ahead = True - file_type = file.split(".")[-1] - - # sub-routing by type of text file to appropriate handler - - if file_type.lower() in ["txt", "md"]: - # will parse as text - text_output = TextParser(self).text_file_handler (input_fp, file) - content_type = "text" - file_type = "txt" - - if file_type.lower() in ["csv"]: - # will parse as table - interpret_as_table=True - text_output = TextParser(self).csv_file_handler(input_fp, file, interpret_as_table=True) - content_type = "text" - file_type = "csv" - if interpret_as_table: - content_type = "table" - - if file_type.lower() in ["json","jsonl"]: - # will parse each line item as separate entry - - interpret_as_table=False - keys = ["text"] - text_output = TextParser(self).jsonl_file_handler(input_fp,file, - key_list=keys, - interpret_as_table=interpret_as_table, - separator="\n") - content_type = "text" - file_type = "jsonl" - if interpret_as_table: - content_type = "table" - - # consolidate into single function - breaking down output rows + if dupe_check: - if write_to_db_on == 1: - new_output, new_blocks, new_pages = self._write_output_to_db(text_output, file, - content_type=content_type, - file_type=file_type) - else: - new_output, new_blocks, new_pages = self._write_output_to_dict(text_output,file, - content_type=content_type, - file_type=file_type) + # basic_library_duplicate_check returns TRUE if it finds the file + if self.basic_library_duplicate_check(file): + go_ahead = False + + if go_ahead: + + text_output = [] + # increment and get new doc_id + if write_to_db_on == 1: + self.library.doc_ID = self.library.get_and_increment_doc_id() + + file_type = file.split(".")[-1] + + # sub-routing by type of text file to appropriate handler + + if file_type.lower() in ["txt", "md"]: + # will parse as text + text_output = TextParser(self).text_file_handler (input_fp, file) + content_type = "text" + file_type = "txt" + + if file_type.lower() in ["csv"]: + # will parse as table + interpret_as_table=True + text_output = TextParser(self).csv_file_handler(input_fp, file, interpret_as_table=True) + content_type = "text" + file_type = "csv" + if interpret_as_table: + content_type = "table" + + if file_type.lower() in ["json","jsonl"]: + # will parse each line item as separate entry + + interpret_as_table=False + keys = ["text"] + text_output = TextParser(self).jsonl_file_handler(input_fp,file, + key_list=keys, + interpret_as_table=interpret_as_table, + separator="\n") + content_type = "text" + file_type = "jsonl" + if interpret_as_table: + content_type = "table" + + # consolidate into single function - breaking down output rows + + if write_to_db_on == 1: + new_output, new_blocks, new_pages = self._write_output_to_db(text_output, file, + content_type=content_type, + file_type=file_type) + else: + new_output, new_blocks, new_pages = self._write_output_to_dict(text_output,file, + content_type=content_type, + file_type=file_type) + # will pass output_blocks as return value output += new_output - docs_added += 1 - blocks_created += new_blocks - pages_added += new_pages + docs_added += 1 + blocks_created += new_blocks + pages_added += new_pages # update overall library counter at end of parsing - if len(text_output) > 0: + if len(output) > 0: if write_to_db_on == 1: dummy = self.library.set_incremental_docs_blocks_images(added_docs=docs_added,added_blocks=blocks_created, added_images=0, added_pages=pages_added) @@ -1453,9 +1474,13 @@ def parse_text(self, input_fp, write_to_db=True, save_history=True): if save_history and write_to_db_on == 0: ParserState().save_parser_output(self.parser_job_id, self.parser_output) + if copy_to_library: + self.uploads(input_fp) + return output - def parse_pdf_by_ocr_images(self, input_fp, write_to_db=True, save_history=True): + def parse_pdf_by_ocr_images(self, input_fp, write_to_db=True, save_history=True, + dupe_check=False,copy_to_library=False): """ Alternative PDF parser option for scanned 'image-based' PDFs where digital parsing is not an option. """ @@ -1489,28 +1514,41 @@ def parse_pdf_by_ocr_images(self, input_fp, write_to_db=True, save_history=True) for file in os.listdir(input_fp): - ext = file.split(".")[-1] - if ext == "pdf": - doc_fn = secure_filename(file) + # by default, will process all files with text file extensions + go_ahead = True - # get new doc_ID number - if write_to_db_on == 1: - self.library.doc_ID = self.library.get_and_increment_doc_id() + if dupe_check: - docs_added += 1 + # basic_library_duplicate_check returns TRUE if it finds the file + if self.basic_library_duplicate_check(file): + go_ahead = False - output_by_page = ImageParser(self).process_pdf_by_ocr(input_fp, file) + if go_ahead: - for j, blocks in enumerate(output_by_page): + ext = file.split(".")[-1] + if ext == "pdf": + doc_fn = secure_filename(file) + # get new doc_ID number if write_to_db_on == 1: - new_output, new_blocks, _ = self._write_output_to_db(blocks,doc_fn,page_num=(j+1)) - else: - new_output, new_blocks, _ = self._write_output_to_dict(blocks,doc_fn,page_num=(j+1)) + self.library.doc_ID = self.library.get_and_increment_doc_id() + + docs_added += 1 + + output_by_page = ImageParser(self).process_pdf_by_ocr(input_fp, file) + + for j, blocks in enumerate(output_by_page): - output += new_output - blocks_added += new_blocks - pages_added += 1 + if write_to_db_on == 1: + new_output, new_blocks, _ = self._write_output_to_db(blocks,doc_fn,page_num=(j+1)) + else: + new_output, new_blocks, _ = self._write_output_to_dict(blocks,doc_fn,page_num=(j+1)) + + output += new_output + blocks_added += new_blocks + pages_added += 1 + + print("update: writing doc - page - ", file, j, len(blocks)) # update overall library counter at end of parsing @@ -1521,6 +1559,9 @@ def parse_pdf_by_ocr_images(self, input_fp, write_to_db=True, save_history=True) if save_history and write_to_db_on == 0: ParserState().save_parser_output(self.parser_job_id, self.parser_output) + if copy_to_library: + self.uploads(input_fp) + return output def _write_output_to_db(self, output, file, content_type="text", file_type="text",page_num=1): @@ -1780,7 +1821,7 @@ def parse_wiki(self, topic_list, write_to_db=True, save_history=False, target_re return output - def parse_image(self, input_folder, write_to_db=True, save_history=True): + def parse_image(self, input_folder, write_to_db=True, save_history=True, dupe_check=False,copy_to_library=False): """ Main entry point for OCR based parsing of image files. """ @@ -1811,23 +1852,35 @@ def parse_image(self, input_folder, write_to_db=True, save_history=True): for file in os.listdir(input_folder): - # increment and get new doc_id - if write_to_db_on == 1: - self.library.doc_ID = self.library.get_and_increment_doc_id() + # by default, will process all files with text file extensions + go_ahead = True - ip_output = ImageParser(self).process_ocr(input_folder, file) + if dupe_check: - if write_to_db_on == 1: - new_output, new_blocks, new_pages = self._write_output_to_db(ip_output,file,content_type="text", - file_type="ocr") - else: - new_output, new_blocks, new_pages = self._write_output_to_dict(ip_output,file, content_type="text", - file_type="ocr") + # basic_library_duplicate_check returns TRUE if it finds the file + if self.basic_library_duplicate_check(file): + go_ahead = False + + if go_ahead: + + # increment and get new doc_id + if write_to_db_on == 1: + self.library.doc_ID = self.library.get_and_increment_doc_id() + + ip_output = ImageParser(self).process_ocr(input_folder, file) + + if write_to_db_on == 1: + new_output, new_blocks, new_pages = self._write_output_to_db(ip_output,file,content_type="text", + file_type="ocr") + else: + new_output, new_blocks, new_pages = self._write_output_to_dict(ip_output,file, content_type="text", + file_type="ocr") + # return output value in either case output += new_output - docs_added += 1 - blocks_added += new_blocks - pages_added += new_pages + docs_added += 1 + blocks_added += new_blocks + pages_added += new_pages if write_to_db_on == 1: dummy = self.library.set_incremental_docs_blocks_images(added_docs=docs_added, added_blocks=blocks_added, @@ -1836,9 +1889,12 @@ def parse_image(self, input_folder, write_to_db=True, save_history=True): if save_history and write_to_db_on == 0: ParserState().save_parser_output(self.parser_job_id, self.parser_output) + if copy_to_library: + self.uploads(input_folder) + return output - def parse_voice(self, input_folder, write_to_db=True, save_history=True): + def parse_voice(self, input_folder, write_to_db=True, save_history=True, dupe_check=False,copy_to_library=False): """ Main entry point for parsing voice wav files. """ @@ -1869,23 +1925,35 @@ def parse_voice(self, input_folder, write_to_db=True, save_history=True): for file in os.listdir(input_folder): - # increment and get new doc_id - if write_to_db_on == 1: - self.library.doc_ID = self.library.get_and_increment_doc_id() + # by default, will process all files with text file extensions + go_ahead = True - vp_output = VoiceParser(self).add_voice_file(input_folder, file) + if dupe_check: - if write_to_db_on == 1: - new_output, new_blocks, new_pages = self._write_output_to_db(vp_output, file, content_type="text", - file_type="voice-wav") - else: - new_output, new_blocks, new_pages = self._write_output_to_dict(vp_output,file, content_type="text", - file_type="voice-wav") + # basic_library_duplicate_check returns TRUE if it finds the file + if self.basic_library_duplicate_check(file): + go_ahead = False + + if go_ahead: + + # increment and get new doc_id + if write_to_db_on == 1: + self.library.doc_ID = self.library.get_and_increment_doc_id() + + vp_output = VoiceParser(self).add_voice_file(input_folder, file) + + if write_to_db_on == 1: + new_output, new_blocks, new_pages = self._write_output_to_db(vp_output, file, content_type="text", + file_type="voice-wav") + else: + new_output, new_blocks, new_pages = self._write_output_to_dict(vp_output,file, content_type="text", + file_type="voice-wav") + # return output in either case output += new_output - docs_added += 1 - blocks_added += new_blocks - pages_added += new_pages + docs_added += 1 + blocks_added += new_blocks + pages_added += new_pages if write_to_db_on == 1: dummy = self.library.set_incremental_docs_blocks_images(added_docs=docs_added, added_blocks=blocks_added, @@ -1894,9 +1962,12 @@ def parse_voice(self, input_folder, write_to_db=True, save_history=True): if save_history and write_to_db_on == 0: ParserState().save_parser_output(self.parser_job_id, self.parser_output) + if copy_to_library: + self.uploads(input_folder) + return output - def parse_dialog(self, input_folder, write_to_db=True, save_history=True): + def parse_dialog(self, input_folder, write_to_db=True, save_history=True, dupe_check=False,copy_to_library=True): """ Main entry point for parsing AWS dialog transcripts. """ @@ -1927,53 +1998,66 @@ def parse_dialog(self, input_folder, write_to_db=True, save_history=True): for file in os.listdir(input_folder): - if file.endswith(".json"): + # by default, will process all files with text file extensions + go_ahead = True - # increment and get new doc_id - if write_to_db_on == 1: - self.library.doc_ID = self.library.get_and_increment_doc_id() + if dupe_check: - logging.info(f"update: dialog file - {file}") + # basic_library_duplicate_check returns TRUE if it finds the file + if self.basic_library_duplicate_check(file): + go_ahead = False - dp_parse_output = DialogParser(self).parse_aws_json_file_format(input_folder, file) + if go_ahead: - block_id = 0 + if file.endswith(".json"): - for i, blocks in enumerate(dp_parse_output): + # increment and get new doc_id + if write_to_db_on == 1: + self.library.doc_ID = self.library.get_and_increment_doc_id() - logging.info(f"update: dialog turn - {i} {blocks}") - - # iterate thru each block -> add to metadata - speaker_name = blocks["speaker_name"] + logging.info(f"update: dialog file - {file}") - meta = {"author": speaker_name, "modified_date": "", "created_date": "", "creator_tool": ""} + dp_parse_output = DialogParser(self).parse_aws_json_file_format(input_folder, file) - coords_dict = {"coords_x": blocks["start_time"], "coords_y": blocks["stop_time"], - "coords_cx": 0, "coords_cy": 0} + block_id = 0 - text_entry = blocks["text"] + for i, blocks in enumerate(dp_parse_output): - # conforming file format with full path of dialog intake path + logging.info(f"update: dialog turn - {i} {blocks}") - format_type = "aws_json" + # iterate thru each block -> add to metadata + speaker_name = blocks["speaker_name"] - new_entry = ("text", format_type, (1, 0), counter, "", "", input_folder + file, - text_entry, text_entry, "", "", text_entry, text_entry, "", text_entry, - "", "", "", "", "") + meta = {"author": speaker_name, "modified_date": "", "created_date": "", "creator_tool": ""} - counter += 1 - dialog_transcripts_added += 1 - conversation_turns += 1 + coords_dict = {"coords_x": blocks["start_time"], "coords_y": blocks["stop_time"], + "coords_cx": 0, "coords_cy": 0} - if write_to_db_on == 1: - output = self.add_create_new_record(self.library, new_entry, meta, coords_dict, - dialog_value="true") - self.library.block_ID += 1 - else: - entry_output = self.create_one_parsing_output_dict(block_id,new_entry,meta,coords_dict, - dialog_value="true") - block_id += 1 - self.parser_output.append(output) + text_entry = blocks["text"] + + # conforming file format with full path of dialog intake path + + format_type = "aws_json" + + new_entry = ("text", format_type, (1, 0), counter, "", "", input_folder + file, + text_entry, text_entry, "", "", text_entry, text_entry, "", text_entry, + "", "", "", "", "") + + counter += 1 + dialog_transcripts_added += 1 + conversation_turns += 1 + + if write_to_db_on == 1: + entry_output = self.add_create_new_record(self.library, new_entry, meta, coords_dict, + dialog_value="true") + self.library.block_ID += 1 + else: + entry_output = self.create_one_parsing_output_dict(block_id,new_entry,meta,coords_dict, + dialog_value="true") + block_id += 1 + self.parser_output.append(entry_output) + + # return output in either case output.append(entry_output) pages_added = dialog_transcripts_added @@ -1984,7 +2068,9 @@ def parse_dialog(self, input_folder, write_to_db=True, save_history=True): added_images=0, added_pages=pages_added) - self.uploads(input_folder) + # by default copies transcripts to upload folder + if copy_to_library: + self.uploads(input_folder) if save_history and write_to_db_on == 0: ParserState().save_parser_output(self.parser_job_id, self.parser_output) @@ -2188,7 +2274,7 @@ def parse_website(self, url_base, write_to_db=True, save_history=True, get_links return output - def uploads(self, tmp_dir): + def uploads(self, tmp_dir, overwrite=False): """ Utility method that handles 'uploads' of input files into library structure. """ @@ -2201,14 +2287,17 @@ def uploads(self, tmp_dir): return -1 upload_fp = self.library.file_copy_path - + library_files = os.listdir(upload_fp) files = os.listdir(tmp_dir) for x in range(0, len(files)): safe_name = self.prep_filename(files[x]) # exclude any folders if not os.path.isdir(os.path.join(tmp_dir,files[x])): - shutil.copy(os.path.join(tmp_dir, files[x]), os.path.join(upload_fp, files[x])) + + # will not over-write an existing file unless overwrite flag set + if overwrite or files[x] not in library_files: + shutil.copy(os.path.join(tmp_dir, files[x]), os.path.join(upload_fp, files[x])) return len(files) @@ -2240,6 +2329,10 @@ def prep_filename(self, fn, secure_name=True, prepend_string=None, postpend_stri def input_ingestion_comparison (self, file_list): + # shortcut if file_list is just empty + if len(file_list) < 1: + return [],[] + """ Compares input with parsed output to identify any rejected files. """ # simple approach - compares input file_list from ingestion 'work_order' with state of library collection @@ -2252,26 +2345,19 @@ def input_ingestion_comparison (self, file_list): return -1 found_list = [] - not_found_list = [] doc_fn_raw_list = CollectionRetrieval(self.library_name, account_name=self.account_name).get_distinct_list("file_source") - doc_fn_out = [] - for i, file in enumerate(doc_fn_raw_list): - doc_fn_out.append(file.split(os.sep)[-1]) - for i, input_file in enumerate(file_list): - found_file = -1 - for j, ingested_file in enumerate(doc_fn_out): + for i, file in enumerate(doc_fn_raw_list): + if file.split(os.sep)[-1] in file_list: + found_list.append(file.split(os.sep)[-1]) + # if found_list is equal length of file_list we don't need to look any further + if len(found_list) == len(file_list): + break - # need to confirm 'symmetrical' transformations, e.g., secure_filename and any prepend/postpend - if input_file == ingested_file: - found_file = 1 - found_list.append(input_file) - break - if found_file == -1: - not_found_list.append(input_file) + not_found_list = list(set(file_list) - set(found_list)) return found_list, not_found_list @@ -2726,11 +2812,11 @@ def parse_one_dialog(self, input_fp, input_fn, save_history=True): ext = input_fn.split(".")[-1].lower() - if ext == ".json": + if ext == "json": - output = DialogParser(self).parse_aws_json_file_format(input_fp, input_fn) + dp_output = DialogParser(self).parse_aws_json_file_format(input_fp, input_fn) - for i, blocks in enumerate(output): + for i, blocks in enumerate(dp_output): # iterate thru each block -> add to metadata speaker_name = blocks["speaker_name"] @@ -2950,6 +3036,143 @@ def basic_library_duplicate_check(self, fn): return in_library + def parse_csv_config(self,fp, fn, cols=None, mapping_dict=None): + + """ Designed for intake of a 'pseudo-db csv table' and will add rows to library with mapped keys. + + Inputs: + -- csv folder path + csv file name + -- cols = # of expected column entries in each row of the CSV + -- mapping dict = assigns key names to columns, starting with 0 for first column + e.g., {"text": 4, "doc_ID": 2, "key1": 3} + + Requirements: + -- must have a "text" key in the mapping dictionary + -- optional doc_ID and block_ID - if found, will over-write the normal library indexes + -- all other keys will be saved as 'metadata' and added to the library block row in "special_field1" + + Note: this feature is currently only supported for Mongo - SQL DB support will follow. + """ + + # method requires Mongo DB and a library loaded in the Parser + if LLMWareConfig().get_config("collection_db") != "mongo" or not self.library: + raise LLMWareException(message="Parsing of a configured CSV file requires (a) use of MongoDB as " + "the text collection parsing database, and (b) a library object to " + "be connected to the parser state.") + + # if found in mapping dict, then will over-write + reserved_keys = ["text", "doc_ID", "block_ID"] + + rejected_rows = [] + ds = [] + + if not mapping_dict: + raise LLMWareException(message="Parsing of a configured CSV file requires a mapping dictionary so that " + "the table attributes can be properly mapped.") + + if not cols: + raise LLMWareException(message="Parsing of a configured CSV file requires a defined column structure and " + "a specified number of columns to ensure accurate mapping.") + + # will iterate through csv file + input_csv = os.path.join(fp, fn) + + import csv + record_file = open(input_csv, "r", encoding='utf-8-sig',errors='ignore') + c = csv.reader(record_file, dialect='excel', doublequote=False, delimiter=',') + output = [] + + # Should be OK to load in memory up to ~1M rows - beyond that, will need to implement iterator + + for lines in c: + output.append(lines) + record_file.close() + + added_row_count = 0 + total_row_count = 0 + added_doc_count = 0 + + for i, rows in enumerate(output): + + text = "" + doc_id = None + block_id = None + metadata = {} + + if len(rows) != cols: + bad_entry = {"index": i, "row": rows} + rejected_rows.append(bad_entry) + + else: + # confirmed that row has the correct number of entries + + for keys, values in mapping_dict.items(): + + if keys == "text": + if mapping_dict["text"] < len(rows): + text = rows[mapping_dict["text"]] + + if keys == "doc_ID": + if mapping_dict["doc_ID"] < len(rows): + doc_id = rows[mapping_dict["doc_ID"]] + + if keys == "block_ID": + if mapping_dict["block_ID"] < len(rows): + block_id = rows[mapping_dict["block_ID"]] + + if keys not in reserved_keys: + if values < len(rows): + metadata.update({keys:rows[values]}) + + if text.strip(): + + meta = {"author": "", "modified_date": "", "created_date": "", "creator_tool": ""} + coords_dict = {"coords_x": 0, "coords_y": 0, "coords_cx": 0, "coords_cy": 0} + + # conforming file format with full path of dialog intake path + + new_row_entry = ("text", "custom_csv", (1, 0), total_row_count, "", "", fn, + text, text, "", "", text, text, "", text, "", "", metadata, "", "") + + # set attributes custom + if doc_id: + try: + self.library.doc_ID = int(doc_id) + added_doc_count += 1 + except: + logging.warning("update: doc_ID expected to be integer - can not apply custom doc ID -" + "will use default library document increment") + + if block_id: + self.library.block_ID = block_id + else: + self.library.block_ID += 1 + + # write row to database + entry_output = self.add_create_new_record(self.library, + new_row_entry, + meta, + coords_dict, + dialog_value="false") + added_row_count += 1 + + total_row_count += 1 + + # update overall library counter at end of parsing + + if len(output) > 0: + + if added_doc_count == 0: + added_doc_count += 1 + + dummy = self.library.set_incremental_docs_blocks_images(added_docs=added_doc_count, + added_blocks=added_row_count, + added_images=0, added_pages=0) + + output = {"rows_added": len(ds), "rejected_count": len(rejected_rows), "rejected_rows": rejected_rows} + + return output + class WebSiteParser: @@ -2996,7 +3219,7 @@ def __init__(self, url_or_fp, link="/", save_images=True, reset_img_folder=False if from_file: # interpret url as file_path and file_name try: - html = open(url_or_fp, encoding='utf-8', errors='ignore').read() + html = open(url_or_fp, encoding='utf-8-sig', errors='ignore').read() bs = BeautifulSoup(html, features="lxml") self.html = bs.findAll() success_code = 1 @@ -3794,7 +4017,7 @@ def jsonl_file_handler (self, dir_fp,sample_file, key_list=None, interpret_as_ta # --based on key_list and interpret_as_table output = [] - my_file = open(os.path.join(dir_fp, sample_file), 'r', encoding='utf-8') + my_file = open(os.path.join(dir_fp, sample_file), 'r', encoding='utf-8-sig',errors='ignore') if not key_list: # as default, if no key_list, then look for "text" attribute in jsonl by default @@ -3824,7 +4047,7 @@ def text_file_handler (self, dir_fp, sample_file): """ Parse .txt file. """ - text_out = open(os.path.join(dir_fp,sample_file), encoding='utf-8', errors='ignore').read() + text_out = open(os.path.join(dir_fp,sample_file), "r", encoding='utf-8-sig', errors='ignore').read() # will chop up the long text into individual text chunks text_chunks = TextChunker(text_chunk=text_out, @@ -3861,7 +4084,7 @@ def csv_file_handler (self, dir_fp,sample_file, max_rows=100, interpret_as_table else: # chunk and split as a big piece of text - raw_csv = open(os.path.join(dir_fp,sample_file), encoding='utf-8', errors='ignore').read() + raw_csv = open(os.path.join(dir_fp,sample_file), "r", encoding='utf-8-sig', errors='ignore').read() # replace ',' & '\n' & '\r' with spaces text_out = re.sub("[,\n\r]", " ", raw_csv) @@ -3959,7 +4182,7 @@ def parse_aws_json_file_format(self, input_folder, fn_json): """ Parse AWS JSON file. """ - f = json.load(open(os.path.join(input_folder, fn_json), "r", encoding='utf-8')) + f = json.load(open(os.path.join(input_folder, fn_json), "r", encoding='utf-8-sig',errors='ignore')) # aws standard call transcript format: ["results"]["items"] -> key conversation elements to aggregate # note: we will need many more documents for testing diff --git a/llmware/prompts.py b/llmware/prompts.py index af98e1ac..1db0f423 100644 --- a/llmware/prompts.py +++ b/llmware/prompts.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -12,6 +11,15 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The prompts module implements the Prompt class, which manages the inference process. This includes +pre-processing, executing, and post-processing of inferences and tracking the state of related inferences, +e.g. in conversational language models. + +The module also implements the Sources, QualityCheck, and HumanInTheLoop classes. The Sources class packages +retrieved sources and appends them to a prompt. The QualityCheck class compares (i.e. verifies) the LLMs' +response against the context information. Finally, the HumanInTheLoop class provides mechanisms for reviews, +which includes access to the prompt history for corrections, as well as user ratings. +""" # from bson import ObjectId @@ -876,7 +884,9 @@ def prompt_main (self, prompt, prompt_name=None, context=None, call_back_attempt output_dict.update({"evidence_metadata": [{"evidence_start_char":0, "evidence_stop_char": evidence_stop_char, "page_num": "NA", - "source_name": "NA"}]}) + "source_name": "NA", + "doc_id": "NA", + "block_id": "NA"}]}) if register_trx: self.register_llm_inference(output_dict,prompt_id,trx_dict) @@ -1238,7 +1248,7 @@ def __init__(self, prompt): "batch_stats.chars", "batch_stats.samples"] self.source_metadata = ["batch_source_num", "evidence_start_char", "evidence_stop_char", - "source_name", "page_num"] + "source_name", "page_num", "doc_id", "block_id"] def token_counter(self, text_sample): @@ -1346,6 +1356,18 @@ def package_source(self, retrieval_material, aggregate_source=True, add_to_promp # if can not retrieve from metadata, then set as default - page 1 page_num = 1 + if "doc_id" in samples[x]: + doc_id = samples[x]["doc_id"] + else: + # if can not retrieve from metadata, then set as default - doc_id 1 + doc_id = 1 + + if "block_id" in samples[x]: + block_id = samples[x]["block_id"] + else: + # if can not retrieve from metadata, then set as default - block_id 1 + block_id = 1 + # keep aggregating text batch up to the size of the target context_window for selected model if (t + token_counter) < self.prompt.context_window_size: @@ -1359,6 +1381,8 @@ def package_source(self, retrieval_material, aggregate_source=True, add_to_promp "evidence_stop_char": batch_char_len, "source_name": source_fn, "page_num": page_num, + "doc_id": doc_id, + "block_id": block_id, } batch_metadata.append(new_source) @@ -1434,6 +1458,8 @@ def package_source(self, retrieval_material, aggregate_source=True, add_to_promp "evidence_stop_char": len(samples[x]["text"]), "source_name": source_fn, "page_num": page_num, + "doc_id": doc_id, + "block_id": block_id, } batch_metadata = [new_source] @@ -1834,7 +1860,7 @@ def source_reviewer (self, response_dict): # min threshold to count as source -> % of total or absolute # of matching tokens if match_score > min_th or len(ev_match_tokens) > min_match_count: - matching_evidence_score.append([match_score, x, ev_match_tokens, evidence_tokens_tmp, evidence_metadata[x]["page_num"], evidence_metadata[x]["source_name"]]) + matching_evidence_score.append([match_score, x, ev_match_tokens, evidence_tokens_tmp, evidence_metadata[x]["page_num"], evidence_metadata[x]["source_name"], evidence_metadata[x]["doc_id"], evidence_metadata[x]["block_id"]]) mes = sorted(matching_evidence_score, key=lambda x: x[0], reverse=True) @@ -1850,6 +1876,8 @@ def source_reviewer (self, response_dict): page_num = mes[m][4] source_name = mes[m][5] + doc_id = mes[m][6] + block_id = mes[m][7] # text_snippet = "Page {}- ... ".format(str(page_num)) text_snippet = "" @@ -1877,7 +1905,7 @@ def source_reviewer (self, response_dict): # new_output = {"text": text_snippet, "match_score": mes[m][0],"source": evidence_metadata[mes[m][1]]} new_output = {"text": text_snippet, "match_score": mes[m][0], "source": source_name, - "page_num": page_num} + "page_num": page_num, "doc_id": doc_id, "block_id": block_id} sources_output.append(new_output) diff --git a/llmware/requirements.txt b/llmware/requirements.txt index 72916676..b701f54a 100644 --- a/llmware/requirements.txt +++ b/llmware/requirements.txt @@ -8,7 +8,7 @@ faiss-cpu==1.7.4 huggingface-hub==0.19.4 lxml==4.9.3 numpy>=1.23.2 -openai==0.27.7 +openai>=1.0 pdf2image==1.16.0 pymilvus==2.3.0 pymongo==4.5.0 @@ -24,10 +24,10 @@ Wikipedia-API==0.6.0 yfinance==0.2.28 psycopg-binary==3.1.17 psycopg==3.1.17 -pgvector==0.2.4 - +pgvector==0.2.4 +colorama==0.4.6 requests~=2.31.0 tqdm~=4.66.1 botocore~=1.27.96 -setuptools~=68.2.0 \ No newline at end of file +setuptools~=68.2.0 diff --git a/llmware/resources.py b/llmware/resources.py index e23de588..7856e167 100644 --- a/llmware/resources.py +++ b/llmware/resources.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -12,6 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The resources module implements the text index databases that are used in conjunction with the vector +databases. + +Currently, llmware supports MongoDB, Postgres, and SQLite as text index databases. +""" import platform @@ -701,8 +705,17 @@ def text_search_with_key_value_dict_filter(self, query, key_value_dict): def get_distinct_list(self, key): """Returns distinct list of items by key""" + # not using distinct operation + # distinct can break due to the number of entries in the library + # to prevent this from happen we use a aggregate which does not produce a document but a cursor + # we loop the cursor and so we overcome the distinct operation 16mb document cap + + group = self.collection.aggregate([{ "$group": {"_id": f'${key}',}}]) + + distinct_list = [] + for entry in group: + distinct_list.append(entry['_id']) - distinct_list = list(self.collection.distinct(key)) return distinct_list def filter_by_key_dict (self, key_dict): @@ -856,7 +869,7 @@ def unpack(self, results_cursor): new_dict.update({key: row[counter]}) counter += 1 else: - logging.info("update: pg_retriever - outputs not matching - %s - %s", counter, row[counter]) + logging.warning("update: pg_retriever - outputs not matching - %s", counter) output.append(new_dict) @@ -886,7 +899,7 @@ def unpack_search_result(self, results_cursor): new_dict.update({key: row[counter]}) counter += 1 else: - logging.info ("update: pg_retriever - outputs not matching - %s - %s ", counter, row[counter]) + logging.warning ("update: pg_retriever - outputs not matching - %s ", counter) output.append(new_dict) @@ -1007,11 +1020,7 @@ def filter_by_key(self, key, value): results = self.conn.cursor().execute(sql_query) if results: - - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) + output = self.unpack(results) self.conn.close() @@ -1146,17 +1155,16 @@ def filter_by_key_dict (self, key_dict): conditions_clause = " WHERE" for key, value in key_dict.items(): - conditions_clause += f" AND {key} = {value}" + conditions_clause += f" {key} = '{value}' AND " + if conditions_clause.endswith(' AND '): + conditions_clause = conditions_clause[:-5] if len(conditions_clause) > len(" WHERE"): sql_query += conditions_clause + ";" results = self.conn.cursor().execute(sql_query) - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) + output = self.unpack(results) self.conn.close() @@ -1173,14 +1181,11 @@ def filter_by_key_value_range(self, key, value_range): value_range_str = value_range_str[:-2] value_range_str += ")" - sql_query = f"SELECT * from {self.library_name} WHERE '{key}' IN {value_range_str};" + sql_query = f"SELECT * from {self.library_name} WHERE {key} IN {value_range_str};" results = self.conn.cursor().execute(sql_query) - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) + output = self.unpack(results) self.conn.close() @@ -1190,14 +1195,11 @@ def filter_by_key_ne_value(self, key, value): """Filter by col (key) not equal to specified value""" - sql_query = f"SELECT * from {self.library_name} WHERE NOT '{key}' = {value};" + sql_query = f"SELECT * from {self.library_name} WHERE NOT {key} = {value};" results = self.conn.cursor().execute(sql_query) - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) + output = self.unpack(results) self.conn.close() @@ -1871,7 +1873,7 @@ def unpack(self, results_cursor): counter += 1 else: - logging.info("update: sqlite_retriever - outputs not matching - %s - %s", counter, len(row)) + logging.warning("update: sqlite_retriever - outputs not matching - %s ", counter) output.append(new_dict) @@ -1908,8 +1910,9 @@ def unpack_search_result(self, results_cursor): new_dict.update({key: row[counter]}) counter += 1 + else: - logging.info("update: sqlite_retriever - outputs not matching - %s - %s", counter, row[counter]) + logging.warning("update: sqlite_retriever - outputs not matching - %s", counter) output.append(new_dict) @@ -2026,26 +2029,7 @@ def filter_by_key(self, key, value): sql_query = f"SELECT rowid, * FROM {self.library_name} WHERE {key} = {value};" results = self.conn.cursor().execute(sql_query) - # lib_card = {} - - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) - - """ - if self.library_name == "library": - - # repackage library card - library_schema = LLMWareTableSchema.get_library_card_schema() - lib_card = {} - counter = 0 - results = list(results) - for keys in library_schema: - # print("update: keys / sql - ", keys, results[0][counter]) - lib_card.update({keys:results[0][counter]}) - counter += 1 - """ + output = self.unpack(results) self.conn.close() @@ -2174,17 +2158,17 @@ def filter_by_key_dict (self, key_dict): conditions_clause = " WHERE" for key, value in key_dict.items(): - conditions_clause += f" AND {key} = {value}" + conditions_clause += f" {key} = {value} AND " + + if conditions_clause.endswith(" AND "): + conditions_clause = conditions_clause[:-5] if len(conditions_clause) > len(" WHERE"): sql_query += conditions_clause + ";" results = self.conn.cursor().execute(sql_query) - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) + output = self.unpack(results) self.conn.close() @@ -2208,10 +2192,7 @@ def filter_by_key_value_range(self, key, value_range): results = self.conn.cursor().execute(sql_query) - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) + output = self.unpack(results) self.conn.close() @@ -2225,10 +2206,7 @@ def filter_by_key_ne_value(self, key, value): results = self.conn.cursor().execute(sql_query) - if self.text_retrieval: - output = self.unpack_search_result(results) - else: - output = self.unpack(results) + output = self.unpack(results) self.conn.close() diff --git a/llmware/retrieval.py b/llmware/retrieval.py index 97b78866..f8924cdf 100644 --- a/llmware/retrieval.py +++ b/llmware/retrieval.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -12,6 +11,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The retrieval module implements the Query class. + +The query class executes queries against vector databases and depends on a library object. +""" import logging diff --git a/llmware/setup.py b/llmware/setup.py index f6f69547..01258878 100644 --- a/llmware/setup.py +++ b/llmware/setup.py @@ -1,4 +1,3 @@ - # Copyright 2023 llmware # Licensed under the Apache License, Version 2.0 (the "License"); you @@ -12,6 +11,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The setup module implements the init process. + +The module implements the Setup class, which has one static method - load_sample_files. This method +creates the necessary directory if they do not exist and downloads the sample files from an AWS S3 instance. +""" import shutil diff --git a/llmware/status.py b/llmware/status.py index 886d7867..9299e839 100644 --- a/llmware/status.py +++ b/llmware/status.py @@ -1,3 +1,9 @@ +"""The status module implements the Status class, which provides an interface for callers to read and write +a status. + +The callers can for example be a UI or a SQL datacase. +""" + import time from threading import Thread diff --git a/llmware/util.py b/llmware/util.py index 71329574..ebfb245f 100644 --- a/llmware/util.py +++ b/llmware/util.py @@ -12,6 +12,12 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or # implied. See the License for the specific language governing # permissions and limitations under the License. +"""The util module implements general helper functions with the Utilties class, and more specialized + other classes. + +Among the more specializes other classes is whole word tokenizer with CorpTokenizer, and statistical +NLP functions to calculate relationships between key words and concepts in a library. +""" import csv @@ -1422,10 +1428,12 @@ def smooth_edge(self,starter,stopper): if x+1 == stopper or ord(self.text_chunk[x + 1]) in [32, 13, 10]: # exclude 'several edge cases where '.' is not a reliable sentence end - short_window = self.text_chunk[x-5:x-1] + short_window = self.text_chunk + if x > 5: + short_window = self.text_chunk[x-5:x-1] # (A) first edge case - "two periods close to each other", e.g., "x.y." - if "." not in short_window: + if "." not in short_window and short_window != "": # (B) second edge case - "period after number in list", e.g., "point 2." if not 47 < ord(short_window[-1]) < 58: diff --git a/setup.py b/setup.py index fcf67cdf..13254f76 100644 --- a/setup.py +++ b/setup.py @@ -51,19 +51,16 @@ def glob_fix(package_name, glob): python_requires=">=3.9", zip_safe=True, install_requires=[ - 'ai21==1.0.3', - 'anthropic==0.3.11', 'beautifulsoup4==4.11.1', 'boto3==1.24.53', - 'cohere==4.1.3', 'datasets==2.15.0', 'faiss-cpu==1.7.4', 'huggingface-hub==0.19.4', 'lxml==4.9.3', 'numpy>=1.23.2', - 'openai==0.27.7', + 'openai>=1.0.0', 'pdf2image==1.16.0', - 'pymilvus==2.3.0', + 'pymilvus>=2.3.0', 'pymongo==4.5.0', 'pytesseract==0.3.10', 'sentence-transformers==2.2.2', @@ -77,7 +74,9 @@ def glob_fix(package_name, glob): 'yfinance==0.2.28', 'psycopg-binary==3.1.17', 'psycopg==3.1.17', - 'pgvector==0.2.4' + 'pgvector==0.2.4', + 'colorama==0.4.6', + 'einops==0.7.0' ], extras_require={ @@ -85,6 +84,7 @@ def glob_fix(package_name, glob): 'lancedb' :['lancedb==0.5.0'], 'qdrant': ['qdrant-client==1.7.0'], 'redis': ['redis==5.0.1'], - 'neo4j': ['neo4j==5.16.0'] + 'neo4j': ['neo4j==5.16.0'], + 'chromadb': ['chromadb==0.4.22'] }, ) diff --git a/tests/embeddings/test_all_embedding_dbs.py b/tests/embeddings/test_all_embedding_dbs.py index b35cdbe1..af79d4e4 100644 --- a/tests/embeddings/test_all_embedding_dbs.py +++ b/tests/embeddings/test_all_embedding_dbs.py @@ -35,6 +35,14 @@ def test_neo4j_embedding_and_query(): assert len(results) > 0 library.delete_library(confirm_delete=True) +def test_chromadb_embedding_and_query(): + sample_files_path = Setup().load_sample_files() + library = Library().create_new_library("test_embedding_neo4j") + library.add_files(os.path.join(sample_files_path,"SmallLibrary")) + results = generic_embedding_and_query(library, "chromadb") + assert len(results) > 0 + library.delete_library(confirm_delete=True) + def test_faiss_embedding_and_query(): sample_files_path = Setup().load_sample_files() library = Library().create_new_library("test_embedding_faiss") diff --git a/tests/embeddings/test_embeddings.py b/tests/embeddings/test_embeddings.py index 90f84063..720d9358 100644 --- a/tests/embeddings/test_embeddings.py +++ b/tests/embeddings/test_embeddings.py @@ -36,6 +36,18 @@ def test_neo4j_embedding_and_query(): assert len(results) > 0 library.delete_library(confirm_delete=True) +# TODO: add test for permanent mode. +# TODO: add test for client/server mode with password auth. +# TODO: add test for client/server mode with token auth. +# TODO: investigate why test failes when library name is set to test_embedding_chromadb +def test_chromadb_embedding_and_query(): + sample_files_path = Setup().load_sample_files() + library = Library().create_new_library("test_embedding_neo4j") # if this is set to chromadb, it fails + library.add_files(os.path.join(sample_files_path,"SmallLibrary")) + results = generic_embedding_and_query(library, "chromadb") + assert len(results) > 0 + library.delete_library(confirm_delete=True) + def test_faiss_embedding_and_query(): sample_files_path = Setup().load_sample_files() library = Library().create_new_library("test_embedding_faiss") diff --git a/wheel_archives/llmware-0.2.1-py3-none-any.whl b/wheel_archives/llmware-0.2.1-py3-none-any.whl new file mode 100644 index 00000000..8558f7ee Binary files /dev/null and b/wheel_archives/llmware-0.2.1-py3-none-any.whl differ diff --git a/wheel_archives/llmware-0.2.2-py3-none-any.whl b/wheel_archives/llmware-0.2.2-py3-none-any.whl new file mode 100644 index 00000000..97aad600 Binary files /dev/null and b/wheel_archives/llmware-0.2.2-py3-none-any.whl differ diff --git a/wheel_archives/llmware-0.2.3-py3-none-any.whl b/wheel_archives/llmware-0.2.3-py3-none-any.whl new file mode 100644 index 00000000..f1802f53 Binary files /dev/null and b/wheel_archives/llmware-0.2.3-py3-none-any.whl differ