langroid · dfm88 · Nov 15, 2024 · Nov 17, 2024
diff --git a/examples/basic/chat.py b/examples/basic/chat.py
@@ -41,7 +41,6 @@ def main(
     model: str = typer.Option("", "--model", "-m", help="model name"),
     no_stream: bool = typer.Option(False, "--nostream", "-ns", help="no streaming"),
     nocache: bool = typer.Option(False, "--nocache", "-nc", help="don't use cache"),
-    query: str = typer.Option("", "--query", "-q", help="initial user query or msg"),
     sys_msg: str = typer.Option(
         "You are a helpful assistant. Be concise in your answers.",
         "--sysmsg",
@@ -83,14 +82,7 @@ def main(
     )
     agent = ChatAgent(config)
     task = Task(agent)
-    # OpenAI models are ok with just a system msg,
-    # but in some scenarios, other (e.g. llama) models
-    # seem to do better when kicked off with a sys msg and a user msg.
-    # In those cases we may want to do task.run("hello") instead.
-    if query:
-        task.run(query)
-    else:
-        task.run()
+    task.run("hello")
 
 
 if __name__ == "__main__":

diff --git a/examples/basic/text-to-structured.py b/examples/basic/text-to-structured.py
@@ -0,0 +1,150 @@
+"""
+Function-calling example using a local LLM, with ollama.
+
+"Function-calling" refers to the ability of the LLM to generate
+a structured response, typically a JSON object, instead of a plain text response,
+which is then interpreted by your code to perform some action.
+This is also referred to in various scenarios as "Tools", "Actions" or "Plugins".
+See more here: https://langroid.github.io/langroid/quick-start/chat-agent-tool/
+
+Run like this (to run with llama-3.1-8b-instant via groq):
+
+python3 examples/basic/text-to-structured.py -m groq/llama-3.1-8b-instant
+
+Other models to try it with:
+- ollama/qwen2.5-coder
+- ollama/qwen2.5
+
+
+See here for how to set up a Local LLM to work with Langroid:
+https://langroid.github.io/langroid/tutorials/local-llm-setup/
+
+
+"""
+
+import os
+from typing import List, Literal
+import fire
+import json
+from rich.prompt import Prompt
+
+from langroid.pydantic_v1 import BaseModel, Field
+import langroid as lr
+from langroid.utils.configuration import settings
+from langroid.agent.tool_message import ToolMessage
+from langroid.agent.tools.orchestration import ResultTool
+import langroid.language_models as lm
+
+# for best results:
+DEFAULT_LLM = lm.OpenAIChatModel.GPT4o
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+# (1) Define the desired structure via Pydantic.
+# The "Field" annotations are optional, and are included in the system message
+# if provided, and help with generation accuracy.
+
+
+class Wifi(BaseModel):
+    name: str
+
+
+class HomeSettings(BaseModel):
+    App: List[str] = Field(..., description="List of apps found in text")
+    wifi: List[Wifi] = Field(..., description="List of wifi networks found in text")
+    brightness: Literal["low", "medium", "high"] = Field(
+        ..., description="Brightness level found in text"
+    )
+
+
+# (2) Define the Tool class for the LLM to use, to produce the above structure.
+class HomeAutomationTool(lr.agent.ToolMessage):
+    """Tool to extract Home Automation structure from text"""
+
+    request: str = "home_automation_tool"
+    purpose: str = """
+    To extract <home_settings> structure from a given text.
+    """
+    home_settings: HomeSettings = Field(
+        ..., description="Home Automation settings from given text"
+    )
+
+    def handle(self) -> str:
+        """Handle LLM's structured output if it matches HomeAutomationTool structure"""
+        print(
+            f"""
+            SUCCESS! Got Valid Home Automation Settings:
+            {json.dumps(self.home_settings.dict(), indent=2)}
+            """
+        )
+        return ResultTool(settings=self.home_settings)
+
+    @classmethod
+    def examples(cls) -> List["ToolMessage"]:
+        # Used to provide few-shot examples in the system prompt
+        return [
+            (
+                """
+                    I have extracted apps Spotify and Netflix, 
+                    wifi HomeWifi, and brightness medium
+                    """,
+                cls(
+                    home_settings=HomeSettings(
+                        App=["Spotify", "Netflix"],
+                        wifi=[Wifi(name="HomeWifi")],
+                        brightness="medium",
+                    )
+                ),
+            )
+        ]
+
+
+def app(
+    m: str = DEFAULT_LLM,  # model
+    d: bool = False,  # pass -d to enable debug mode (see prompts etc)
+    nc: bool = False,  # pass -nc to disable cache-retrieval (i.e. get fresh answers)
+):
+    settings.debug = d
+    settings.cache = not nc
+    # create LLM config
+    llm_cfg = lm.OpenAIGPTConfig(
+        chat_model=m or DEFAULT_LLM,
+        chat_context_length=4096,  # set this based on model
+        max_output_tokens=100,
+        temperature=0.2,
+        stream=True,
+        timeout=45,
+    )
+
+    tool_name = HomeAutomationTool.default_value("request")
+    config = lr.ChatAgentConfig(
+        llm=llm_cfg,
+        system_message=f"""
+        You are an expert in extracting home automation settings from text.
+        When user gives a piece of text, use the TOOL `{tool_name}`
+        to present the extracted structured information.
+        """,
+    )
+
+    agent = lr.ChatAgent(config)
+
+    # (4) Enable the Tool for this agent --> this auto-inserts JSON instructions
+    # and few-shot examples (specified in the tool defn above) into the system message
+    agent.enable_message(HomeAutomationTool)
+
+    # (5) Create task and run it to start an interactive loop
+    # Specialize the task to return a ResultTool object
+    task = lr.Task(agent, interactive=False)[ResultTool]
+
+    # set up a loop to extract Home Automation settings from text
+    while True:
+        text = Prompt.ask("[blue]Enter text (or q/x to exit)")
+        if not text or text.lower() in ["x", "q"]:
+            break
+        result = task.run(text)
+        assert isinstance(result, ResultTool)
+        assert isinstance(result.settings, HomeSettings)
+
+
+if __name__ == "__main__":
+    fire.Fire(app)
diff --git a/examples/docqa/doc-aware-chat.py b/examples/docqa/doc-aware-chat.py
@@ -0,0 +1,171 @@
+"""
+Single Agent for Doc-aware chat with user.
+
+- user asks question
+- LLM decides whether to:
+    - ask user for follow-up/clarifying information, or
+    - retrieve relevant passages from documents, or
+    - provide a final answer, if it has enough information from user and documents.
+
+To reduce response latency, in the DocChatAgentConfig,
+you can set the `relevance_extractor_config=None`,
+to turn off the relevance_extraction step, which uses the LLM
+to extract verbatim relevant portions of retrieved chunks.
+
+Run like this:
+
+python3 examples/docqa/doc-aware-chat.py
+"""
+
+from typing import Optional, Any
+
+from rich import print
+from rich.prompt import Prompt
+import os
+
+from langroid import ChatDocument
+from langroid.agent.special.doc_chat_agent import (
+    DocChatAgent,
+    DocChatAgentConfig,
+)
+import langroid.language_models as lm
+from langroid.mytypes import Entity
+from langroid.parsing.parser import ParsingConfig, PdfParsingConfig, Splitter
+from langroid.agent.chat_agent import ChatAgent
+from langroid.agent.task import Task
+from langroid.agent.tools.orchestration import ForwardTool
+from langroid.agent.tools.retrieval_tool import RetrievalTool
+from langroid.utils.configuration import set_global, Settings
+from fire import Fire
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+
+
+class DocAwareChatAgent(DocChatAgent):
+    def __init__(self, config: DocChatAgentConfig):
+        super().__init__(config)
+        self.enable_message(RetrievalTool)
+
+    def retrieval_tool(self, msg: RetrievalTool) -> str:
+        results = super().retrieval_tool(msg)
+        return f"""
+
+        RELEVANT PASSAGES:
+        =====        
+        {results}        
+        ====
+
+
+        BASED on these RELEVANT PASSAGES, DECIDE:
+        - If this is sufficient to provide the user a final answer specific to 
+            their situation, do so.
+        - Otherwise, 
+            - ASK the user for more information to get a better understanding
+              of their situation or context, OR
+            - use this tool again to get more relevant passages.
+        """
+
+    def llm_response(
+        self,
+        query: None | str | ChatDocument = None,
+    ) -> Optional[ChatDocument]:
+        # override DocChatAgent's default llm_response
+        return ChatAgent.llm_response(self, query)
+
+    def handle_message_fallback(self, msg: str | ChatDocument) -> Any:
+        # we are here if there is no tool in the msg
+        if isinstance(msg, ChatDocument) and msg.metadata.sender == Entity.LLM:
+            # Any non-tool message must be meant for user, so forward it to user
+            return ForwardTool(agent="User")
+
+
+def main(
+    debug: bool = False,
+    nocache: bool = False,
+    model: str = lm.OpenAIChatModel.GPT4o,
+) -> None:
+    llm_config = lm.OpenAIGPTConfig(chat_model=model)
+    config = DocChatAgentConfig(
+        llm=llm_config,
+        n_query_rephrases=0,
+        hypothetical_answer=False,
+        relevance_extractor_config=None,
+        # this turns off standalone-query reformulation; set to False to enable it.
+        assistant_mode=True,
+        n_neighbor_chunks=2,
+        parsing=ParsingConfig(  # modify as needed
+            splitter=Splitter.TOKENS,
+            chunk_size=100,  # aim for this many tokens per chunk
+            n_neighbor_ids=5,
+            overlap=20,  # overlap between chunks
+            max_chunks=10_000,
+            # aim to have at least this many chars per chunk when
+            # truncating due to punctuation
+            min_chunk_chars=200,
+            discard_chunk_chars=5,  # discard chunks with fewer than this many chars
+            n_similar_docs=5,
+            # NOTE: PDF parsing is extremely challenging, each library has its own
+            # strengths and weaknesses. Try one that works for your use case.
+            pdf=PdfParsingConfig(
+                # alternatives: "unstructured", "pdfplumber", "fitz"
+                library="fitz",
+            ),
+        ),
+    )
+
+    set_global(
+        Settings(
+            debug=debug,
+            cache=not nocache,
+        )
+    )
+
+    doc_agent = DocAwareChatAgent(config)
+    print("[blue]Welcome to the document chatbot!")
+    url = Prompt.ask("[blue]Enter the URL of a document")
+    doc_agent.ingest_doc_paths([url])
+
+    # For a more flexible/elaborate user doc-ingest dialog, use this:
+    # doc_agent.user_docs_ingest_dialog()
+
+    doc_task = Task(
+        doc_agent,
+        interactive=False,
+        name="DocAgent",
+        system_message=f"""
+        You are a DOCUMENT-AWARE-GUIDE, but you do NOT have direct access to documents.
+        Instead you can use the `retrieval_tool` to get passages from the documents
+        that are relevant to a certain query or search phrase or topic.
+        DO NOT ATTEMPT TO ANSWER THE USER'S QUESTION WITHOUT RETRIEVING RELEVANT
+        PASSAGES FROM THE DOCUMENTS. DO NOT use your own existing knowledge!!
+        Everything you tell the user MUST be based on the documents.
+
+        The user will ask you a question that you will NOT be able to answer
+        immediately, because you are MISSING some information about:
+            - the user or their context or situation, etc
+            - the documents relevant to the question
+
+        At each turn you must decide among these possible ACTIONS:
+        - use the `{RetrievalTool.name()}` to get more relevant passages from the 
+            documents, OR
+        - ANSWER the user if you think you have enough information 
+            from the user AND the documents, to answer the question.
+
+        You can use the `{RetrievalTool.name()}` multiple times to get more 
+        relevant passages, if you think the previous ones were not sufficient.
+
+        REMEMBER - your goal is to be VERY HELPFUL to the user; this means
+        you should NOT OVERWHELM them by throwing them a lot of information and
+        ask them to figure things out. Instead, you must GUIDE them 
+        by asking SIMPLE QUESTIONS, ONE at at time, and finally provide them
+        a clear, DIRECTLY RELEVANT answer that is specific to their situation. 
+        """,
+    )
+
+    print("[cyan]Enter x or q to quit, or ? for evidence")
+
+    doc_task.run("Can you help me with some questions?")
+
+
+if __name__ == "__main__":
+    Fire(main)