diff --git a/docs/AGENTC_PACKAGE_REFERENCE.md b/docs/AGENTC_PACKAGE_REFERENCE.md new file mode 100644 index 0000000..61cb5aa --- /dev/null +++ b/docs/AGENTC_PACKAGE_REFERENCE.md @@ -0,0 +1,1282 @@ +Package Documentation +agentc Package +pydantic settings agentc.catalog.Catalog[source] +A provider of indexed "agent building blocks" (e.g., tools, prompts, spans...). + +Class Description +A Catalog instance can be configured in three ways (listed in order of precedence): + +Directly (as arguments to the constructor). + +Via the environment (though environment variables). + +Via a .env configuration file. + +In most cases, you'll want to configure your catalog via a .env file. This style of configuration means you can instantiate a Catalog instance as such: + +import agentc +catalog = agentc.Catalog() +Some custom configurations can only be specified via the constructor (e.g., secrets). For example, if your secrets are managed by some external service (defined below as my_secrets_manager), you can specify them as such: + +import agentc +catalog = agentc.Catalog(secrets={ + "CB_CONN_STRING": os.getenv("CB_CONN_STRING"), + "CB_USERNAME": os.getenv("CB_USERNAME"), + "CB_PASSWORD": my_secrets_manager.get("THE_CB_PASSWORD"), + "CB_CERTIFICATE": my_secrets_manager.get("PATH_TO_CERT"), +}) +Fields: +Span( +name, +session=None, +state=None, +iterable=False, +blacklist=None, +**kwargs +)[source] +A factory method to initialize a Span (more specifically, a GlobalSpan) instance. + +Parameters: +name (str) -- Name to bind to each message logged within this span. + +session (str) -- The run that this tree of spans is associated with. By default, this is a UUID. + +state (Any) -- A JSON-serializable object that will be logged on entering and exiting this span. + +iterable (bool) -- Whether this new span should be iterable. By default, this is False. + +blacklist (set[Kind]) -- A set of content types to skip logging. By default, there is no blacklist. + +kwargs -- Additional keyword arguments to pass to the Span constructor. + +Return type: +Span + +find( +kind, +query=None, +name=None, +annotations=None, +catalog_id='__LATEST__', +limit=1 +)[source] +Return a list of tools or prompts based on the specified search criteria. + +Method Description +This method is meant to act as the programmatic equivalent of the agentc find command. Whether (or not) the results are fetched from the local catalog or the remote catalog depends on the configuration of this agentc_core.catalog.Catalog instance. + +For example, to find a tool named "get_sentiment_of_text", you would author: + +results = catalog.find(kind="tool", name="get_sentiment_of_text") +sentiment_score = results[0].func("I love this product!") +To find a prompt named "summarize_article_instructions", you would author: + +results = catalog.find(kind="prompt", name="summarize_article_instructions") +prompt_for_agent = summarize_article_instructions.content +Parameters: +kind (Literal['tool', 'prompt']) -- The type of item to search for, either 'tool' or 'prompt'. + +query (str) -- A query string (natural language) to search the catalog with. + +name (str) -- The specific name of the catalog entry to search for. + +annotations (str) -- An annotation query string in the form of KEY="VALUE" (AND|OR KEY="VALUE")*. + +catalog_id (str) -- The snapshot version to find the tools for. By default, we use the latest snapshot. + +limit (int | None) -- The maximum number of results to return (ignored if name is specified). + +Returns: +One of the following: + +None if no results are found by name. + +"tools" if kind is "tool" (see find_tools() for details). + +"prompts" if kind is "prompt" (see find_prompts() for details). + +Return type: +list[ToolResult] | list[PromptResult] | ToolResult | PromptResult | None + +find_prompts( +query=None, +name=None, +annotations=None, +catalog_id='__LATEST__', +limit=1 +)[source] +Return a list of prompts based on the specified search criteria. + +Parameters: +query (str) -- A query string (natural language) to search the catalog with. + +name (str) -- The specific name of the catalog entry to search for. + +annotations (str) -- An annotation query string in the form of KEY="VALUE" (AND|OR KEY="VALUE")*. + +catalog_id (str) -- The snapshot version to find the tools for. By default, we use the latest snapshot. + +limit (int | None) -- The maximum number of results to return (ignored if name is specified). + +Returns: +A list of Prompt instances, with the following attributes: + +content (str | dict): The content to be served to the model. + +tools (list): The list containing the tool functions associated with prompt. + +output (dict): The output type of the prompt, if it exists. + +meta (RecordDescriptor): The metadata associated with the prompt. + +Return type: +list[PromptResult] | PromptResult | None + +find_tools( +query=None, +name=None, +annotations=None, +catalog_id='__LATEST__', +limit=1 +)[source] +Return a list of tools based on the specified search criteria. + +Parameters: +query (str) -- A query string (natural language) to search the catalog with. + +name (str) -- The specific name of the catalog entry to search for. + +annotations (str) -- An annotation query string in the form of KEY="VALUE" (AND|OR KEY="VALUE")*. + +catalog_id (str) -- The snapshot version to find the tools for. By default, we use the latest snapshot. + +limit (int | None) -- The maximum number of results to return (ignored if name is specified). + +Returns: +By default, a list of Tool instances with the following attributes: + +func (typing.Callable): A Python callable representing the function. + +meta (RecordDescriptor): The metadata associated with the tool. + +input (dict): The argument schema (in JSON schema) associated with the tool. + +If a tool_decorator is present, this method will return a list of objects decorated accordingly. + +Return type: +list[ToolResult] | ToolResult | None + +property version: VersionDescriptor +The version of the catalog currently being served (i.e., the latest version). + +Returns: +An agentc_core.version.VersionDescriptor instance. + +pydantic model agentc.span.Span[source] +A structured logging context for agent activity. + +Class Description +A Span instance belongs to a tree of other Span instances, whose root is a GlobalSpan instance that is constructed using the Catalog.Span() method. + +Attention + +Spans should never be created directly (via constructor), as logs generated by the span must always be associated with a catalog version and some application structure. + +Below we illustrate how a tree of Span instances is created: + +import agentc +catalog = agentc.Catalog() +root_span = catalog.Span(name="root") +child_1_span = root_span.new(name="child_1") +child_2_span = root_span.new(name="child_2") +In practice, you'll likely use different spans for different agents and/or different tasks. Below we give a small LangGraph example using spans for different agents: + +import agentc +import langgraph.graph + +catalog = agentc.Catalog() +root_span = catalog.Span(name="flight_planner") + +def front_desk_agent(...): + with root_span.new(name="front_desk_agent") as front_desk_span: + ... + +def route_finding_agent(...): + with root_span.new(name="route_finding_agent") as route_finding_span: + ... + +workflow = langgraph.graph.StateGraph() +workflow.add_node("front_desk_agent", front_desk_agent) +workflow.add_node("route_finding_agent", route_finding_agent) +workflow.set_entry_point("front_desk_agent") +workflow.add_edge("front_desk_agent", "route_finding_agent") +... +Fields: +blacklist (set[agentc_core.activity.models.content.Kind]) + +iterable (bool | None) + +kwargs (dict[str, Any] | None) + +logger (Callable[[...], agentc_core.activity.models.log.Log]) + +name (str) + +parent (agentc_core.activity.span.Span) + +state (Any) + +field blacklist: set[Kind] [Optional] +List of content types to filter. + +Validated by: +_initialize_iterable_logger + +field iterable: bool | None = False +Flag to indicate whether or not this span should be iterable. + +Validated by: +_initialize_iterable_logger + +field kwargs: dict[str, Any] | None = None +Annotations to apply to all messages logged within this span. + +Validated by: +_initialize_iterable_logger + +field name: str [Required] +Name to bind to each message logged within this span. + +Validated by: +_initialize_iterable_logger + +field parent: Span = None +Parent span of this span (i.e., the span that had new() called on it). + +Validated by: +_initialize_iterable_logger + +field state: Any = None +A JSON-serializable object that will be logged on entering and exiting this span. + +Validated by: +_initialize_iterable_logger + +pydantic model Identifier[source] +The unique identifier for a Span. + +Class Description +A Span is uniquely identified by two parts: + +an application-defined multipart name and... + +a session identifier unique to each run of the application. + +Fields: +name (list[str]) + +session (str) + +field name: list[str] [Required] +The name of the Span. + +Names are built up from the root of the span tree to the leaf, thus the first element of name is the name of the root and the last element is the name of the current span (i.e., the leaf). + +field session: str [Required] +The session identifier of the Span. + +Sessions must be unique to each run of the application. By default, we generate these as UUIDs (see GlobalSpan.session). + +enter()[source] +Record a BeginContent log entry for this span. + +Method Description +The enter() method is to denote the start of the span (optionally logging the incoming state if specified). This method is also called when entering the span using the with statement. In the example below, enter() is called (implicitly). + +import agentc + +catalog = agentc.Catalog() +incoming_state = {"flights": []} +with catalog.Span(name="flight_planner", state=incoming_state) as span: + flight_planner_implementation() +On entering the context, one log is generated possessing the content below: + +{ "kind": "begin", "state": {"flights": []} } +Return type: +Self + +exit()[source] +Record a EndContent log entry for this span. + +Method Description +The exit() method is to denote the end of the span (optionally logging the outgoing state if specified). This method is also called when exiting the span using the with statement successfully. In the example below, exit() is called (implicitly). + +import agentc + +catalog = agentc.Catalog() +incoming_state = {"flights": []} +with catalog.Span(name="flight_planner", state=incoming_state) as span: + ... = flight_planner_implementation(...) + incoming_state["flights"] = [{"flight_number": "AA123", "status": "on_time"}] +On exiting the context, one log is generated possessing the content below: + +{ "kind": "end", "state": {"flights": [{"flight_number": "AA123", "status": "on_time"}]} } +Note + +The state of the span must be JSON-serializable and must be mutated in-place. If you are working with immutable state objects, you must set the state attribute before exiting the span (i.e., before the with statement exits or with exit() explicitly). + +import agentc + +catalog = agentc.Catalog() +immutable_incoming_state = {"flights": []} +with catalog.Span(name="flight_planner", state=incoming_state) as span: + ... = flight_planner_implementation(...) + span.state = {"flights": [{"flight_number": "AA123", "status": "on_time"}]} +log( +content, +**kwargs +)[source] +Accept some content (with optional annotations specified by kwargs) and generate a corresponding log entry. + +Method Description +The heart of the Span class is the log() method. This method is used to log events that occur within the span. Users can capture events that occur in popular frameworks like LangChain and LlamaIndex using our helper packages (see agentc_langchain, agentc_langgraph, and agentc_llamaindex) but must use those packages in conjunction with this log() method to capture the full breadth of their application's activity. See here for a list of all available log content types. + +Users can also use Python's [] syntax to write arbitrary JSON-serializable content as a key-value (KeyValueContent) pair. This is useful for logging arbitrary data like metrics during evaluations. In the example below, we illustrate an example of a system-wide evaluation suite that uses this [] syntax: + +import my_agent_app +import my_output_evaluator +import agentc + +catalog = agentc.Catalog() +evaluation_span = catalog.Span(name="evaluation_suite") +with open("my-evaluation-suite.json") as fp: + for i, line in enumerate(fp): + with evaluation_span.new(name=f"evaluation{i}") as span: + output = my_agent_app(span) + span["positive_sentiment"] = my_output_evaluator.positive(output) + span.log( + content={ + "kind": "key-value", + "key": "negative_sentiment", + "value": my_output_evaluator.negative(output) + }, + alpha="SDGD" + ) +All keywords passed to the log() method will be applied as annotations to the log entry. In the example above, the alpha annotation is applied only to the second log entry. For span-wide annotations, use the kwargs attribute on new(). + +Parameters: +content (SystemContent | ToolCallContent | ToolResultContent | ChatCompletionContent | RequestHeaderContent | UserContent | AssistantContent | BeginContent | EndContent | EdgeContent | KeyValueContent) -- The content to log. + +kwargs -- Additional annotations to apply to the log. + +logs()[source] +Return the logs generated by the tree of Span nodes rooted from this Span instance. + +Method Description +The logs() method returns an iterable of all logs generated within the span. This method is also called (implicitly) when iterating over the span (e.g., using a for loop). To use this method, you must set the iterable attribute to True when instantiating the span: + +import agentc + +catalog = agentc.Catalog() +span = catalog.Span(name="flight_planner", iterable=True) +for log in span: + match log.content.kind: + case "begin": + ... +Tip + +Generally, this method should only be used for debugging purposes. This method will keep all logs generated by the span in memory. To perform efficient aggregate analysis of your logs, consider querying the agent_activity.logs collection in your Couchbase cluster using SQL++ instead. + +Return type: +Iterable[Log] + +new( +name, +state=None, +iterable=False, +blacklist=None, +**kwargs +)[source] +Create a new span under the current Span. + +Method Description +Spans require a name and a session (see identifier). Aside from name, state, and iterable, you can also pass additional keywords that will be applied as annotations to each log() call within a span. As an example, the following code illustrates the use of kwargs to add a span-wide "alpha" annotation: + +import agentc +catalog = agentc.Catalog() +root_span = catalog.Span(name="flight_planner") +with root_span.new(name="find_airports_task", alpha="SDGD") as child_span: + child_span.log(content=agentc.span.UserContent(value="Hello, world!", "beta": "412d")) +The example code above will generate the three logs below (for brevity, we only show the content and +annotations fields): + +{ "content": { "kind": "begin" }, "annotations": { "alpha": "SDGD"} } +{ "content": { "kind": "user", "value": "Hello, world!" }, + "annotations": { "alpha": "SDGD", "beta": "412d" } } +{ "content" : { "kind": "end" }, "annotations": { "alpha": "SDGD" } } +Parameters: +name (str) -- The name of the span. + +state (Any) -- The starting state of the span. This will be recorded upon entering and exiting the span. + +iterable (bool) -- Whether this new span should be iterable. By default, this is False. + +blacklist (set[Kind]) -- A set of content types to skip logging. By default, there is no blacklist. + +kwargs -- Additional annotations to apply to the span. + +Returns: +A new Span instance. + +Return type: +Span + +property identifier: Identifier +A unique identifier for this span. + +Integration Packages +LangChain +class agentc_langchain.chat.Callback( +span, +tools=None, +output=None +)[source] +A callback that will log all LLM calls using the given span as the root. + +Class Description +This class is a callback that will log all LLM calls using the given span as the root. This class will record all messages used to generated ChatCompletionContent and ToolCallContent. ToolResultContent is not logged by this class, as it is not generated by a BaseChatModel instance. + +Below, we illustrate a minimal example of how to use this class: + +import langchain_openai +import langchain_core.messages +import agentc_langchain.chat +import agentc + +# Create a span to bind to the chat model messages. +catalog = agentc.Catalog() +root_span = catalog.Span(name="root_span") + +# Create a chat model. +chat_model = langchain_openai.chat_models.ChatOpenAI(model="gpt-4o", callbacks=[]) + +# Create a callback with the appropriate span, and attach it to the chat model. +my_agent_span = root_span.new(name="my_agent") +callback = agentc_langchain.chat.Callback(span=my_agent_span) +chat_model.callbacks.append(callback) +result = chat_model.invoke(messages=[ + langchain_core.messages.SystemMessage(content="Hello, world!") +]) +To record the exact tools and output used by the chat model, you can pass in the tools and output to the agentc_langchain.chat.Callback constructor. For example: + +import langchain_openai +import langchain_core.messages +import langchain_core.tools +import agentc_langchain.chat +import agentc + +# Create a span to bind to the chat model messages. +catalog = agentc.Catalog() +root_span = catalog.Span(name="root_span") + +# Create a chat model. +chat_model = langchain_openai.chat_models.ChatOpenAI(model="gpt-4o", callbacks=[]) + +# Grab the correct tools and output from the catalog. +my_agent_prompt = catalog.find("prompt", name="my_agent") +my_agent_tools = [ + langchain_core.tools.StructuredTool.from_function(tool.func) for tool in my_agent_prompt.tools +] +my_agent_output = my_agent_prompt.output + +# Create a callback with the appropriate span, tools, and output, and attach it to the chat model. +my_agent_span = root_span.new(name="my_agent") +callback = agentc_langchain.chat.Callback( + span=my_agent_span, + tools=my_agent_tools, + output=my_agent_output +) +chat_model.callbacks.append(callback) +result = chat_model.with_structured_output(my_agent_output).invoke(messages=[ + langchain_core.messages.SystemMessage(content=my_agent_prompt.content) +]) +Parameters: +span (Span) + +tools (list[Tool]) + +output (tuple | dict) + +agentc_langchain.cache.cache( +chat_model, +kind, +embeddings=None, +options=None, +**kwargs +)[source] +A function to attach a Couchbase-backed exact or semantic cache to a ChatModel. + +Function Description +This function is used to set the .cache property of LangChain ChatModel instances. For all options related to this Couchbase-backed cache, see CacheOptions. + +Below, we illustrate a minimal working example of how to use this function to store and retrieve LLM responses via exact prompt matching: + +import langchain_openai +import agentc_langchain.cache + +chat_model = langchain_openai.chat_models.ChatOpenAI(model="gpt-4o") +caching_chat_model = agentc_langchain.cache.cache( + chat_model=chat_model, + kind="exact", + create_if_not_exists=True +) + +# Response #2 is served from the cache. +response_1 = caching_chat_model.invoke("Hello there!") +response_2 = caching_chat_model.invoke("Hello there!") +To use this function to store and retrieve LLM responses via semantic similarity, use the kind="semantic" argument with an langchain_core.embeddings.Embeddings instance: + +import langchain_openai +import agentc_langchain.cache + +chat_model = langchain_openai.chat_models.ChatOpenAI(model="gpt-4o") +embeddings = langchain_openai.OpenAIEmbeddings(model="text-embedding-3-small") +caching_chat_model = agentc_langchain.cache.cache( + chat_model=chat_model, + kind="semantic", + embeddings=embeddings, + create_if_not_exists=True +) + +# Response #2 is served from the cache. +response_1 = caching_chat_model.invoke("Hello there!") +response_2 = caching_chat_model.invoke("Hello there!!") +By default, the Couchbase initialization of the cache is separate from the cache's usage (storage and retrieval). To explicitly initialize the cache yourself, use the initialize() method. + +See also + +This method uses the langchain_couchbase.cache.CouchbaseCache and langchain_couchbase.cache.CouchbaseSemanticCache classes from the langchain_couchbase package. See here for more details. + +Parameters: +chat_model (BaseChatModel) -- The LangChain chat model to cache responses for. + +kind (Literal['exact', 'semantic']) -- The type of cache to attach to the chat model. + +embeddings (Embeddings) -- The embeddings to use when attaching a 'semantic' cache to the chat model. + +options (CacheOptions) -- The options to use when attaching a cache to the chat model. + +kwargs -- Keyword arguments to be forwarded to a CacheOptions constructor (ignored if options is present). + +Returns: +The same LangChain chat model that was passed in, but with a cache attached. + +Return type: +BaseChatModel + +agentc_langchain.cache.initialize( +kind, +options=None, +embeddings=None, +**kwargs +)[source] +A function to create the collections and/or indexes required to use the cache() function. + +Function Description +This function is a helper function for creating the default collection (and index, in the case of kind="semantic") required for the cache() function. Below, we give a minimal working example of how to use this function to create a semantic cache backed by Couchbase. + +import langchain_openai +import agentc_langchain.cache + +embeddings = langchain_openai.OpenAIEmbeddings(model="text-embedding-3-small") +agentc_langchain.cache.initialize( + kind="semantic", + embeddings=embeddings +) + +chat_model = langchain_openai.chat_models.ChatOpenAI(model="gpt-4o") +caching_chat_model = agentc_langchain.cache.cache( + chat_model=chat_model, + kind="semantic", + embeddings=embeddings, +) + +# Response #2 is served from the cache. +response_1 = caching_chat_model.invoke("Hello there!") +response_2 = caching_chat_model.invoke("Hello there!!") +Parameters: +kind (Literal['exact', 'semantic']) -- The type of cache to attach to the chat model. + +embeddings (Embeddings) -- The embeddings to use when attaching a 'semantic' cache to the chat model. + +options (CacheOptions) -- The options to use when attaching a cache to the chat model. + +kwargs -- Keyword arguments to be forwarded to a CacheOptions constructor (ignored if options is present). + +Return type: +None + +pydantic settings agentc_langchain.cache.CacheOptions[source] +Config: +env_prefix: str = AGENT_CATALOG_LANGCHAIN_CACHE_ + +env_file: str = .env + +Fields: +bucket (str | None) + +collection (str | None) + +conn_root_certificate (str | pathlib.Path | None) + +conn_string (str | None) + +create_if_not_exists (bool | None) + +ddl_retry_attempts (int | None) + +ddl_retry_wait_seconds (float | None) + +index_name (str | None) + +password (pydantic.types.SecretStr | None) + +scope (str | None) + +score_threshold (float | None) + +ttl (datetime.timedelta | None) + +username (str | None) + +field bucket: str | None = None +The name of the Couchbase bucket hosting the cache. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +field collection: str | None = 'langchain_llm_cache' +The name of the Couchbase collection hosting the cache. + +This field is optional and defaults to langchain_llm_cache. + +Validated by: +_pull_cluster_from_agent_catalog + +field conn_root_certificate: str | Path | None = None +Path to the root certificate file for the Couchbase cluster. + +This field is optional and only required if the Couchbase cluster is using a self-signed certificate. + +Validated by: +_pull_cluster_from_agent_catalog + +field conn_string: str | None = None +The connection string to the Couchbase cluster hosting the cache. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +field create_if_not_exists: bool | None = False +Create the required collections and/or indexes if they do not exist. + +When raised (i.e., this value is set to True), the collections and indexes will be created if they do not exist. Lower this flag (set this to False) to instead raise an error if the collections & indexes do not exist. + +Validated by: +_pull_cluster_from_agent_catalog + +field ddl_retry_attempts: int | None = 3 +Maximum number of attempts to retry DDL operations. + +This value is only used on setup (i.e., the first time the cache is requested). If the number of attempts is exceeded, the command will fail. By default, this value is 3 attempts. + +Validated by: +_pull_cluster_from_agent_catalog + +field ddl_retry_wait_seconds: float | None = 5 +Wait time (in seconds) between DDL operation retries. + +This value is only used on setup (i.e., the first time the cache is requested). By default, this value is 5 seconds. + +Validated by: +_pull_cluster_from_agent_catalog + +field index_name: str | None = 'langchain_llm_cache_index' +The name of the Couchbase FTS index used to query the cache. + +This field will only be used if the cache is of type semantic. If the cache is of type semantic and this field is not specified, this field defaults to langchain_llm_cache_index. + +Validated by: +_pull_cluster_from_agent_catalog + +field password: SecretStr | None = None +Password associated with the Couchbase instance hosting the cache. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +field scope: str | None = 'agent_activity' +The name of the Couchbase scope hosting the cache. + +This field is optional and defaults to agent_activity. + +Validated by: +_pull_cluster_from_agent_catalog + +field score_threshold: float | None = 0.8 +The score threshold used to quantify what constitutes as a "good" match. + +This field will only be used if the cache is of type semantic. If the cache is of type semantic and this field is not specified, this field defaults to 0.8. + +Validated by: +_pull_cluster_from_agent_catalog + +field ttl: timedelta | None = None +The time-to-live (TTL) for the cache. + +When specified, the cached documents will be automatically removed after the specified duration. This field is optional and defaults to None. + +Validated by: +_pull_cluster_from_agent_catalog + +field username: str | None = None +Username associated with the Couchbase instance hosting the cache. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +LangGraph +class agentc_langgraph.tool.ToolNode( +span, +*args, +**kwargs +)[source] +A tool node that logs tool results to a span. + +Class Description +This class will record the results of each tool invocation to the span that is passed to it (ultimately generating ToolResultContent log entries). This class does not log tool calls (i.e., ToolCallContent log entries) as these are typically logged with ChatCompletionContent log entries. + +Below, we illustrate a minimal working example of how to use this class with agentc_langchain.chat.Callback to record ChatCompletionContent log entries, ToolCallContent log entries, and ToolResultContent log entries. + +import langchain_openai +import langchain_core.tools +import langgraph.prebuilt +import agentc_langchain.chat +import agentc_langgraph +import agentc + +# Create a span to bind to the chat model messages. +catalog = agentc.Catalog() +root_span = catalog.Span(name="root_span") + +# Create a chat model. +chat_model = langchain_openai.chat_models.ChatOpenAI(model="gpt-4o", callbacks=[]) + +# Create a callback with the appropriate span, and attach it to the chat model. +my_agent_span = root_span.new(name="my_agent") +callback = agentc_langchain.chat.Callback(span=my_agent_span) +chat_model.callbacks.append(callback) + +# Grab the correct tools and output from the catalog. +my_agent_prompt = catalog.find("prompt", name="my_agent") +my_agent_tools = agentc_langgraph.tool.ToolNode( + span=my_agent_span, + tools=[ + langchain_core.tools.tool( + tool.func, + args_schema=tool.input, + ) for tool in my_agent_prompt.tools + ] +) +my_agent_output = my_agent_prompt.output + +# Finally, build your agent. +my_agent = langgraph.prebuilt.create_react_agent( + model=chat_model, + tools=my_agent_tools, + prompt=my_agent_prompt, + response_format=my_agent_output +) +Note + +For all constructor parameters, see the documentation for langgraph.prebuilt.ToolNode here. + +Parameters: +span (Span) + +class agentc_langgraph.agent.ReActAgent( +chat_model, +catalog, +span, +prompt_name=None +)[source] +A helper ReAct agent base class that integrates with Agent Catalog. + +Class Description +This class is meant to handle some of the boilerplate around using Agent Catalog with LangGraph's prebuilt ReAct agent. More specifically, this class performs the following: + +Fetches the prompt given the name (prompt_name) in the constructor and supplies the prompt and tools attached to the prompt to the ReAct agent constructor. + +Attaches a agentc_langchain.chat.Callback to the given chat_model to record all chat-model related activity (i.e., chat completions and tool calls). + +Wraps tools (if present in the prompt) in a agentc_langgraph.tool.ToolNode instance to record the results of tool calls. + +Wraps the invocation of this agent in a agentc.Span context manager. + +Below, we illustrate an example Agent Catalog prompt and an implementation of this class for our prompt. First, our prompt: + +record_kind: prompt +name: endpoint_finding_node +description: All inputs required to assemble the endpoint finding agent. + +output: + title: Endpoints + description: The source and destination airports for a flight / route. + type: object + properties: + source: + type: string + description: "The IATA code for the source airport." + dest: + type: string + description: "The IATA code for the destination airport." + required: [source, dest] + +content: + agent_instructions: > + Your task is to find the source and destination airports for a flight. + The user will provide you with the source and destination cities. + You need to find the IATA codes for the source and destination airports. + Another agent will use these IATA codes to find a route between the two airports. + If a route cannot be found, suggest alternate airports (preferring airports that are more likely to have + routes between them). + + output_format_instructions: > + Ensure that each IATA code is a string and is capitalized. +Next, the usage of this prompt in an implementation of this class: + +import langchain_core.messages +import agentc_langgraph.agent +import agentc +import typing + +class State(agentc_langgraph.state): + endpoints: typing.Optional[dict] + +class EndpointFindingAgent(agentc_langgraph.agent.ReActAgent): + def __init__(self, catalog: agentc.Catalog, span: agentc.Span, **kwargs): + chat_model = langchain_openai.chat_models.ChatOpenAI(model="gpt-4o", temperature=0) + super().__init__( + chat_model=chat_model, + catalog=catalog, + span=span, + prompt_name="endpoint_finding_node", + **kwargs + ) + + def _invoke(self, span: agentc.Span, state: State, config) -> State: + # Give the working state to our agent. + agent = self.create_react_agent(span) + response = agent.invoke(input=state, config=config) + + # 'source' and 'dest' comes from the prompt's output format. + # Note this is a direct mutation on the "state" given to the Span! + structured_response = response["structured_response"] + state["endpoints"] = {"source": structured_response["source"], "destination": structured_response["dest"]} + state["messages"].append(response["messages"][-1]) + return state + +if __name__ == '__main__': + catalog = agentc.Catalog() + span = catalog.Span(name="root_span") + my_agent = EndpointFindingAgent(catalog=catalog, span=span) +Note + +For all constructor parameters, see the documentation for langgraph.prebuilt.create_react_agent here. + +Parameters: +chat_model (BaseChatModel) + +catalog (Catalog) + +span (Span) + +prompt_name (str) + +async ainvoke( +input, +config=None, +**kwargs +)[source] +Default implementation of ainvoke, calls invoke from a thread. + +The default implementation allows usage of async code even if the Runnable did not implement a native async version of invoke. + +Subclasses should override this method if they can run asynchronously. + +Parameters: +input (State) + +config (RunnableConfig | None) + +Return type: +State | Command + +invoke( +input, +config=None, +**kwargs +)[source] +Transform a single input into an output. + +Args: +input: The input to the Runnable. config: A config to use when invoking the Runnable. + +The config supports standard keys like 'tags', 'metadata' for tracing purposes, 'max_concurrency' for controlling how much work to do in parallel, and other keys. Please refer to the RunnableConfig for more details. Defaults to None. + +Returns: +The output of the Runnable. + +Parameters: +input (State) + +config (RunnableConfig | None) + +Return type: +State | Command + +name: str | None +The name of the Runnable. Used for debugging and tracing. + +class agentc_langgraph.agent.State[source] +An (optional) state class for use with Agent Catalog's LangGraph helper classes. + +Class Description +The primary use for this class to help agentc_langgraph.agent.ReActAgent instances build agentc.span.EdgeContent logs. This class is essentially identical to the default state schema for LangGraph (i.e., messages and is_last_step) but with the inclusion of a new previous_node field. + +class agentc_langgraph.graph.GraphRunnable( +*, +catalog, +span=None +)[source] +A helper class that wraps the "Runnable" interface with agentc.Span. + +Class Description +This class is meant to handle some of the boilerplate around using agentc.Span instances and LangGraph compiled graphs. Specifically, this class builds a new span on instantiation and wraps all Runnable methods in a Span's context manager. + +Below, we illustrate an example implementation of this class for a two-agent system. + +import langgraph.prebuilt +import langgraph.graph +import langchain_openai +import langchain_core.messages +import agentc_langgraph +import agentc +import typing + +class MyResearcherApp(agentc_langgraph.graph.GraphRunnable): + def search_web(self, str: search_string) -> str: + ... + + def summarize_results(self, str: content) -> str: + ... + + def compile(self): + research_agent = langgraph.prebuilt.create_react_agent( + model=langchain_openai.ChatOpenAI(model="gpt-4o"), + tools=[self.search_web] + ) + summary_agent = langgraph.prebuilt.create_react_agent( + model=langchain_openai.ChatOpenAI(model="gpt-4o"), + tools=[self.summarize_results] + ) + workflow = langgraph.graph.StateGraph(agentc_langgraph.graph.State) + workflow.add_node("research_agent", research_agent) + workflow.add_node("summary_agent", summary_agent) + workflow.add_edge("research_agent", "summary_agent") + workflow.add_edge("summary_agent", langgraph.graph.END) + workflow.set_entry_point("research_agent") + return workflow.compile() + +if __name__ == '__main__': + catalog = agentc.Catalog() + state = MyResearchState(messages=[], is_last_step=False) + MyResearcherApp(catalog=catalog).invoke(input=state) +Note + +For more information around LangGraph's (LangChain's) Runnable interface, see LangChain's documentation here. + +Tip + +The example above does not use tools and prompts managed by Agent Catalog. See agentc_langgraph.agent.ReActAgent for a helper class that handles some of the boilerplate around using LangGraph's prebuilt ReAct agent and Agent Catalog. + +Parameters: +catalog (Catalog) + +span (Span) + +class agentc_langgraph.state.CheckpointSaver( +options=None, +*, +serde=None, +**kwargs +)[source] +Checkpoint saver class to persist LangGraph states in a Couchbase instance. + +Class Description +Instances of this class are used by LangGraph (passed in during compile() time) to save checkpoints of agent state. + +Below, we give a minimal working example of how to use this class with LangGraph's prebuilt ReAct agent. + +import langchain_openai +import langgraph.prebuilt +import agentc_langgraph.state + +# Pass our checkpoint saver to the create_react_agent method. +chat_model = langchain_openai.ChatOpenAI(name="gpt-4o") +agent = langgraph.prebuilt.create_react_agent( + model=chat_model, + tools=list(), + checkpointer=CheckpointSaver(create_if_not_exists=True) +) +config = {"configurable": {"thread_id": "1"}} +agent.invoke({"messages": [("human", "Hello!)]}, config) +To use this method with Agent Catalog's agentc_langgraph.graph.GraphRunnable class, pass the checkpoint saver to your workflow's compile() method (see the documentation for LangGraph's Graph.compile() method here for more information. + +import langgraph.prebuilt +import langgraph.graph +import langchain_openai +import langchain_core.messages +import agentc_langgraph +import agentc +import typing + +class MyResearcherApp(agentc_langgraph.graph.GraphRunnable): + def search_web(self, str: search_string) -> str: + ... + + def summarize_results(self, str: content) -> str: + ... + + def compile(self): + research_agent = langgraph.prebuilt.create_react_agent( + model=langchain_openai.ChatOpenAI(model="gpt-4o"), + tools=[self.search_web] + ) + summary_agent = langgraph.prebuilt.create_react_agent( + model=langchain_openai.ChatOpenAI(model="gpt-4o"), + tools=[self.summarize_results] + ) + workflow = langgraph.graph.StateGraph(agentc_langgraph.graph.State) + workflow.add_node("research_agent", research_agent) + workflow.add_node("summary_agent", summary_agent) + workflow.add_edge("research_agent", "summary_agent") + workflow.add_edge("summary_agent", langgraph.graph.END) + workflow.set_entry_point("research_agent") + checkpointer = agentc_langgraph.state.CheckpointSaver(create_if_not_exists=True) + return workflow.compile(checkpointer=checkpointer) +Tip + +See here for more information about checkpoints in LangGraph. + +See also + +This class is a wrapper around the langgraph_checkpointer_couchbase.CouchbaseSaver class. See here for more information. + +Parameters: +options (CheckpointOptions) + +serde (SerializerProtocol) + +agentc_langgraph.state.initialize( +options=None, +**kwargs +)[source] +A function to create the collections required to use the checkpoint savers in this module. + +Function Description +This function is a helper function for creating the default collections (the thread and tuple collections) required for the CheckpointSaver and :py:class`AsyncCheckpointSaver` classes. Below, we give a minimal working example of how to use this function to create these collections. + +import langchain_openai +import langgraph.prebuilt +import agentc_langgraph.state + +# Initialize our collections. +agentc_langgraph.state.initialize() + +# Pass our checkpoint saver to the create_react_agent method. +chat_model = langchain_openai.ChatOpenAI(name="gpt-4o") +agent = langgraph.prebuilt.create_react_agent( + model=chat_model, + tools=list(), + checkpointer=CheckpointSaver() +) +config = {"configurable": {"thread_id": "1"}} +agent.invoke({"messages": [("human", "Hello there!")]}, config) +Parameters: +options (CheckpointOptions) -- The options to use when saving checkpoints to Couchbase. + +kwargs -- Keyword arguments to be forwarded to a CheckpointOptions constructor (ignored if options is present). + +Return type: +None + +pydantic settings agentc_langgraph.state.CheckpointOptions[source] +Config: +extra: str = allow + +env_prefix: str = AGENT_CATALOG_LANGGRAPH_CHECKPOINT_ + +env_file: str = .env + +Fields: +bucket (str | None) + +checkpoint_collection (str | None) + +conn_root_certificate (str | pathlib.Path | None) + +conn_string (str | None) + +create_if_not_exists (bool | None) + +ddl_retry_attempts (int | None) + +ddl_retry_wait_seconds (float | None) + +password (pydantic.types.SecretStr | None) + +scope (str | None) + +tuple_collection (str | None) + +username (str | None) + +field bucket: str | None = None +The name of the Couchbase bucket hosting the checkpoints. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +field checkpoint_collection: str | None = 'langgraph_checkpoint_thread' +The name of the Couchbase collection hosting the checkpoints threads. + +This field is optional and defaults to langgraph_checkpoint_thread. + +Validated by: +_pull_cluster_from_agent_catalog + +field conn_root_certificate: str | Path | None = None +Path to the root certificate file for the Couchbase cluster. + +This field is optional and only required if the Couchbase cluster is using a self-signed certificate. + +Validated by: +_pull_cluster_from_agent_catalog + +field conn_string: str | None = None +The connection string to the Couchbase cluster hosting the cache. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +field create_if_not_exists: bool | None = False +Create the required collections if they do not exist. + +When raised (i.e., this value is set to True), the collections will be created if they do not exist. Lower this flag (set this to False) to instead raise an error if the collections do not exist. + +Validated by: +_pull_cluster_from_agent_catalog + +field ddl_retry_attempts: int | None = 3 +Maximum number of attempts to retry DDL operations. + +This value is only used on setup (i.e., the first time the checkpointer is requested). If the number of attempts is exceeded, the command will fail. By default, this value is 3 attempts. + +Validated by: +_pull_cluster_from_agent_catalog + +field ddl_retry_wait_seconds: float | None = 5 +Wait time (in seconds) between DDL operation retries. + +This value is only used on setup (i.e., the first time the checkpointer is requested). By default, this value is 5 seconds. + +Validated by: +_pull_cluster_from_agent_catalog + +field password: SecretStr | None = None +Password associated with the Couchbase instance hosting the cache. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +field scope: str | None = 'agent_activity' +The name of the Couchbase scope hosting the checkpoints. + +This field is optional and defaults to agent_activity. + +Validated by: +_pull_cluster_from_agent_catalog + +field tuple_collection: str | None = 'langgraph_checkpoint_tuple' +The name of the Couchbase collection hosting the checkpoints tuples. + +This field is optional and defaults to langgraph_checkpoint_tuple. + +Validated by: +_pull_cluster_from_agent_catalog + +field username: str | None = None +Username associated with the Couchbase instance hosting the cache. + +This field must be specified. + +Validated by: +_pull_cluster_from_agent_catalog + +LlamaIndex +class agentc_llamaindex.chat.Callback( +span, +event_starts_to_ignore=None, +event_ends_to_ignore=None +)[source] +All callback that will log all LlamaIndex events using the given span as the root. + +Class Description +This class is a callback handler that will log ChatCompletionContent, ToolCallContent, and ToolResultContent using events yielded from LlamaIndex (with the given span as the root). Below, we provide an example of how to use this class. + +import agentc +import llama_index.core.llms +import llama_index.llms.openai + +catalog = agentc.Catalog() +root_span = catalog.Span(name="root_span") +my_prompt = catalog.find("prompt", name="talk_like_a_pirate") +chat_model = llama_index.llms.openai.OpenAI(model="gpt-4o") +chat_model.callback_manager.add_handler(Callback(span=span)) +result = chat_model.chat( + [ + llama_index.core.llms.ChatMessage(role="system", content=my_prompt.content), + llama_index.core.llms.ChatMessage(role="user", content="What is your name"), + ] +) +Parameters: +span (Span) + +event_starts_to_ignore (list[CBEventType]) + +event_ends_to_ignore (list[CBEventType]) \ No newline at end of file diff --git a/docs/main-updates.md b/docs/main-updates.md new file mode 100644 index 0000000..e69de29 diff --git a/notebooks/flight_search_agent_langraph/architecture.md b/notebooks/flight_search_agent_langraph/architecture.md new file mode 100644 index 0000000..58c1af8 --- /dev/null +++ b/notebooks/flight_search_agent_langraph/architecture.md @@ -0,0 +1,347 @@ +# Flight Search Agent - Architecture Documentation + +## πŸ—οΈ **Core Architecture Stack** + +- **Agent Catalog**: Tool management and orchestration framework +- **Couchbase**: Vector database for airline reviews + NoSQL for bookings +- **LangGraph**: Agent workflow orchestration with ReAct pattern +- **OpenAI/Capella AI**: LLM backend with 4-tier priority system + +## πŸ“Š **Data Layer Components** + +### **1. Airline Reviews (Vector Store)** + +- **Source**: Kaggle Indian Airlines Customer Reviews dataset (via kagglehub) +- **Processing**: Converts reviews to structured text with airline, rating, title, content +- **Storage**: Couchbase vector store with embeddings for semantic search +- **Index**: Custom vector search index (`airline_reviews_index`) +- **Usage**: Powers `search_airline_reviews` tool for customer feedback queries + +### **2. Flight Bookings (NoSQL)** + +- **Scope**: `agentc_bookings` +- **Collection**: Daily collections (`user_bookings_YYYYMMDD`) +- **Schema**: booking_id, airports, date, passengers, class, price, status +- **Features**: Duplicate detection, automatic pricing, booking confirmations + +### **3. Flight Routes (External)** + +- **Source**: Couchbase `travel-sample` bucket (demo data) +- **Data**: Routes with airline codes, aircraft types, airport pairs +- **Usage**: Powers `lookup_flight_info` for flight availability + +## πŸ”§ **Agent Tools (4 Core Functions)** + +### **1. `lookup_flight_info`** - Flight Search + +- **Input**: source_airport, destination_airport (3-letter codes) +- **Function**: Queries Couchbase travel-sample for available routes +- **Output**: Formatted list of flights with airline codes and aircraft types +- **Example**: "JFK,LAX" β†’ Lists 8 airlines (AA, DL, UA, etc.) with equipment + +### **2. `save_flight_booking`** - Flight Booking + +- **Input**: Structured or natural language booking request +- **Processing**: Parses airports, dates, passengers, class; validates inputs +- **Features**: Duplicate detection, automatic pricing, booking ID generation +- **Output**: Confirmation with booking ID and details +- **Example**: Creates booking FL08061563CACD with full details + +### **3. `retrieve_flight_bookings`** - Booking Management + +- **Input**: Empty for all bookings, or "SOURCE,DEST,DATE" for specific +- **Function**: Queries daily booking collections with status filtering +- **Output**: Formatted list of current bookings with all details +- **Features**: Date-based collection partitioning, status management + +### **4. `search_airline_reviews`** - Customer Feedback + +- **Input**: Natural language query about airline services +- **Function**: Vector similarity search on embedded airline reviews +- **Output**: Top 5 relevant reviews with ratings and details +- **Example**: "SpiceJet service" β†’ Returns customer feedback with ratings + +## πŸ’­ **Agent Prompt System (ReAct Pattern)** + +### **Prompt Structure** (`flight_search_assistant.yaml`): + +- **Framework**: ReAct (Reason + Act) pattern with strict formatting +- **Task Classification**: Automatically identifies flight search, booking, retrieval, or review tasks +- **Tool Mapping**: Direct tool calls without intermediate extraction steps +- **Error Recovery**: Built-in fallback strategies and alternative approaches + +### **Key Behavior Rules**: + +1. **Immediate Tool Execution**: No intermediate steps - calls tools directly +2. **Format Compliance**: Strict ReAct format (Question β†’ Thought β†’ Action β†’ Observation β†’ Final Answer) +3. **Error Handling**: Robust input parsing with multiple fallback strategies +4. **Completion Focus**: Always completes user requests successfully + +### **Agent Flow**: + +``` +Query β†’ Task Classification β†’ Tool Selection β†’ Parameter Parsing β†’ Tool Execution β†’ Response Formatting +``` + +### **Input Handling Examples**: + +- "Find flights JFK to LAX" β†’ `lookup_flight_info(JFK, LAX)` +- "Book 2 business class LAX to JFK tomorrow" β†’ `save_flight_booking` with parsed details +- Natural language β†’ Structured parameters automatically + +## πŸ“ˆ **Evaluation Framework (Arize Phoenix)** + +### **Phoenix Observability**: + +- **Tracing**: Full LangGraph execution traces with tool calls +- **UI Dashboard**: Real-time monitoring at http://localhost:6006 +- **Instrumentation**: OpenTelemetry for LangChain + OpenAI integrations + +### **Evaluation Metrics** (4 Phoenix Evaluators): + +1. **Relevance**: Does response address the flight query? +2. **QA Correctness**: Is flight information accurate and helpful? +3. **Hallucination**: Does response contain fabricated information? +4. **Toxicity**: Is response harmful or inappropriate? + +### **Reference Answers**: + +Pre-defined expected outputs in `data/queries.py` for consistent evaluation + +### **Arize Dataset Integration**: + +- Automatic dataset creation from evaluation results +- Timestamped dataset names for version tracking +- Integration with Arize AI platform for production monitoring + +### **Test Queries** (Standard Evaluation Set): + +- Flight search: "Find flights from JFK to LAX" +- Booking: "Book a flight from LAX to JFK for tomorrow, 2 passengers, business class" +- Retrieval: "Show me my current flight bookings" +- Reviews: "What do passengers say about SpiceJet's service quality?" + +## πŸ“Š **System Architecture Flowchart** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ FLIGHT SEARCH AGENT β”‚ +β”‚ (Agent Catalog + LangGraph) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ USER QUERY β”‚ +β”‚ ("Find flights JFK to LAX", "Book a flight") β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ LANGRAPH WORKFLOW ENGINE β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ FlightSearch β”‚ β”‚ ReAct Agent β”‚ β”‚ Agent Catalog β”‚ β”‚ +β”‚ β”‚ Graph State β”‚β†’ β”‚ (Reasoning) β”‚β†’ β”‚ Tool Discovery β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ TOOL SELECTION β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ lookup_flight_ β”‚ β”‚ save_flight_ β”‚ β”‚ retrieve_flight_ β”‚ β”‚ +β”‚ β”‚ info β”‚ β”‚ booking β”‚ β”‚ bookings β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ search_airline_reviews β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ COUCHBASE DATABASE β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ VECTOR STORE β”‚ β”‚ NoSQL STORE β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ β€’ Airline Reviews β”‚ β”‚ β€’ Flight Bookings β”‚ β”‚ +β”‚ β”‚ β€’ Vector Embeddings β”‚ β”‚ β€’ User Sessions β”‚ β”‚ +β”‚ β”‚ β€’ Similarity Search β”‚ β”‚ β€’ Daily Partitions β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”‚ β”‚ β”‚ +β”‚ (Capella AI Embeddings) (SQL++ Queries) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ RESPONSE GENERATION β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ LLM BACKEND (4-TIER) β”‚ β”‚ +β”‚ β”‚ 1. Capella AI (Priority) 2. OpenAI 3. Fallback 4. Local β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ FORMATTED RESPONSE β”‚ +β”‚ (Flight listings, Booking confirmations, etc.) β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸ”„ **Data Flow Workflow** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ FLIGHT SEARCH β”‚ β”‚ FLIGHT BOOKING β”‚ β”‚ REVIEW SEARCH β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ JFK β†’ LAX β”‚ β”‚ Book JFKβ†’MIA β”‚ β”‚ SpiceJet serviceβ”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚lookup_flight_ β”‚ β”‚save_flight_ β”‚ β”‚search_airline_ β”‚ +β”‚info() β”‚ β”‚booking() β”‚ β”‚reviews() β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ SQL++ Query β”‚ β”‚ Input Parsing β”‚ β”‚ Vector Search β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ travel-sample β”‚ β”‚ β€’ Airports β”‚ β”‚ β€’ Embeddings β”‚ +β”‚ .inventory.routeβ”‚ β”‚ β€’ Date parsing β”‚ β”‚ β€’ Similarity β”‚ +β”‚ β”‚ β”‚ β€’ Passenger cnt β”‚ β”‚ β€’ Top-K results β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ β”‚ β”‚ + β–Ό β–Ό β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ 8 Flight Routes β”‚ β”‚ Booking Record β”‚ β”‚ 5 Reviews β”‚ +β”‚ β€’ Airlines β”‚ β”‚ β€’ FL08061563... β”‚ β”‚ β€’ Ratings β”‚ +β”‚ β€’ Aircraft β”‚ β”‚ β€’ Confirmation β”‚ β”‚ β€’ Customer β”‚ +β”‚ β€’ Route info β”‚ β”‚ β€’ Price calc β”‚ β”‚ β€’ Experience β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸ“Š **Evaluation Pipeline Flow** + +``` +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ EVALUATION PIPELINE β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Test Queries β”‚ β”‚ Agent Setup β”‚ β”‚ Phoenix Observability β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β€’ Flight search β”‚β†’ β”‚ β€’ Clear data β”‚β†’ β”‚ β€’ Launch UI (port 6006) β”‚ +β”‚ β€’ Booking β”‚ β”‚ β€’ Initialize β”‚ β”‚ β€’ OTEL instrumentation β”‚ +β”‚ β€’ Retrieval β”‚ β”‚ β€’ Load reviews β”‚ β”‚ β€’ Trace collection β”‚ +β”‚ β€’ Reviews β”‚ β”‚ β”‚ β”‚ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ PHOENIX EVALUATORS β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Relevance β”‚ β”‚ QA Correctness β”‚ β”‚ Hallucination β”‚ β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β”‚ Does response β”‚ β”‚ Is information β”‚ β”‚ Contains fabricated β”‚ β”‚ +β”‚ β”‚ address query? β”‚ β”‚ accurate? β”‚ β”‚ information? β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”‚ +β”‚ β”‚ Toxicity β”‚ β”‚ +β”‚ β”‚ Is response harmful? β”‚ β”‚ +β”‚ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ + β”‚ + β–Ό +β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” +β”‚ Results DataFrameβ”‚β†’ β”‚ Arize Dataset β”‚β†’ β”‚ Performance Report β”‚ +β”‚ β”‚ β”‚ β”‚ β”‚ β”‚ +β”‚ β€’ Query/Responseβ”‚ β”‚ β€’ Timestamped β”‚ β”‚ β€’ Success rates β”‚ +β”‚ β€’ Eval scores β”‚ β”‚ β€’ Versioned β”‚ β”‚ β€’ Execution times β”‚ +β”‚ β€’ Explanations β”‚ β”‚ β€’ Exportable β”‚ β”‚ β€’ Quality metrics β”‚ +β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜ +``` + +## πŸš€ **Key Technical Implementation Details** + +### **Agent Catalog Integration** + +- **Prompt-Embedded Tools**: Tools declared directly in prompt YAML for single source of truth +- **Automatic Tool Discovery**: `prompt_resource.tools` provides direct access to embedded tools +- **Prompt Management**: YAML-based prompt templates with dynamic content injection +- **Session Tracking**: Built-in observability and activity logging +- **Multi-framework Support**: Works with LangGraph, LangChain, and LlamaIndex + +### **Couchbase Integration** + +- **Dual Database Pattern**: Vector store for reviews + NoSQL for transactional data +- **Connection Pooling**: Shared cluster connections across tool modules +- **Automatic Setup**: Dynamic collection/scope creation with proper indexing +- **Data Partitioning**: Daily collections for bookings with cleanup strategies + +### **Tool Wrapper Architecture** + +- **Interface Translation**: Bridges Agent Catalog tools (structured parameters) with LangChain ReAct agent (single string inputs) +- **Input Sanitization**: Removes ReAct format artifacts (`\nObservation`, `Action:`, etc.) from tool inputs +- **Multi-format Parsing**: Supports various input formats: + - Key-value: `source_airport="JFK", destination_airport="LAX"` + - Comma-separated: `"JFK,LAX"` + - Natural language: `"JFK to LAX"` +- **Parameter Mapping**: Converts single strings to named function parameters +- **Error Handling**: Provides user-friendly error messages and graceful degradation + +**Why Tool Wrappers Are Needed**: +```python +# Agent Catalog tools expect: +lookup_flight_info(source_airport="JFK", destination_airport="LAX") + +# LangChain ReAct provides: +tool_input = "JFK to LAX\nObservation: ..." # Messy string + +# Wrapper converts: string β†’ structured parameters β†’ tool call +``` + +### **Error Handling & Robustness** + +- **Input Parsing**: Multiple fallback strategies for natural language processing +- **Connection Recovery**: Automatic reconnection and timeout handling +- **Validation Layers**: Airport codes, date formats, passenger counts +- **Graceful Degradation**: Meaningful error messages for users + +### **Performance Optimizations** + +- **Batch Processing**: Embeddings created in configurable batch sizes +- **Connection Reuse**: Global cluster instances prevent connection overhead +- **Caching Strategies**: Processed data caching in memory for repeated loads +- **Query Optimization**: Parameterized queries prevent SQL injection + +### **Production Considerations** + +- **Environment Configuration**: 12-factor app pattern with .env files +- **Logging Integration**: Structured logging with configurable levels +- **Monitoring Ready**: Phoenix traces + Arize dataset exports +- **Scalability**: Stateless design supports horizontal scaling + +## 🎯 **Interview Talking Points** + +### **Architecture Strengths**: + +1. **Modular Design**: Clean separation between tools, data, and orchestration +2. **Technology Integration**: Demonstrates modern AI stack (Agent Catalog + Couchbase + LangGraph) +3. **Production Ready**: Comprehensive error handling, monitoring, and evaluation +4. **Extensible**: Easy to add new tools or modify existing functionality + +### **Technical Depth**: + +1. **Vector Search**: Semantic search implementation with embedding strategies +2. **Database Design**: Multi-modal data storage (vector + NoSQL) patterns +3. **Agent Workflows**: ReAct pattern implementation with LangGraph state management +4. **Evaluation Framework**: Comprehensive testing with LLM-as-a-judge metrics + +### **Business Value**: + +1. **User Experience**: Natural language to structured operations +2. **Data Integration**: Real customer review data enhances booking decisions +3. **Observability**: Full traceability for debugging and optimization +4. **Scalability**: Architecture supports production deployment scenarios diff --git a/notebooks/flight_search_agent_langraph/data/airline_reviews_data.py b/notebooks/flight_search_agent_langraph/data/airline_reviews_data.py index 13bf3a9..bc2c989 100644 --- a/notebooks/flight_search_agent_langraph/data/airline_reviews_data.py +++ b/notebooks/flight_search_agent_langraph/data/airline_reviews_data.py @@ -14,7 +14,7 @@ from couchbase.options import ClusterOptions import dotenv import pandas as pd -from langchain_couchbase.vectorstores import CouchbaseVectorStore +from langchain_couchbase.vectorstores import CouchbaseSearchVectorStore from tqdm import tqdm # Import kagglehub only when needed to avoid import errors during indexing @@ -153,7 +153,7 @@ def load_to_vector_store( review_texts = self.process_to_texts() # Setup vector store for the target collection - vector_store = CouchbaseVectorStore( + vector_store = CouchbaseSearchVectorStore( cluster=cluster, bucket_name=bucket_name, scope_name=scope_name, diff --git a/notebooks/flight_search_agent_langraph/data/queries.py b/notebooks/flight_search_agent_langraph/data/queries.py index ba0eb32..1bbba69 100644 --- a/notebooks/flight_search_agent_langraph/data/queries.py +++ b/notebooks/flight_search_agent_langraph/data/queries.py @@ -28,9 +28,9 @@ # Query 2: Flight booking LAX to JFK for tomorrow, 2 passengers, business class """Flight Booking Confirmed! -Booking ID: FL08061563CACD +Booking ID: FL09251563CACD Route: LAX β†’ JFK -Departure Date: 2025-08-06 +Departure Date: 2025-09-25 Passengers: 2 Class: business Total Price: $1500.00 @@ -45,9 +45,9 @@ # Query 3: Flight booking JFK to MIA for next week """Flight Booking Confirmed! -Booking ID: FL08124E7B9C2A +Booking ID: FL10014E7B9C2A Route: JFK β†’ MIA -Departure Date: 2025-08-12 +Departure Date: 2025-10-01 Passengers: 1 Class: economy Total Price: $250.00 @@ -63,42 +63,42 @@ """Your Current Bookings (2 found): Booking 1: - Booking ID: FL08061563CACD + Booking ID: FL09251563CACD Route: LAX β†’ JFK - Date: 2025-08-06 + Date: 2025-09-25 Passengers: 2 Class: business Total: $1500.00 Status: confirmed - Booked: 2025-08-05 + Booked: 2025-09-24 Booking 2: - Booking ID: FL08124E7B9C2A + Booking ID: FL10014E7B9C2A Route: JFK β†’ MIA - Date: 2025-08-12 + Date: 2025-10-01 Passengers: 1 Class: economy Total: $250.00 Status: confirmed - Booked: 2025-08-05""", + Booked: 2025-09-24""", # Query 5: SpiceJet service quality reviews - """Found 5 relevant airline reviews for 'SpiceJet service': + """Found 5 relevant airline reviews for 'SpiceJet service quality': Review 1: -Airline: SpiceJet. Title: "Service is impeccable". Review: βœ… Trip Verified | Much better than airbus models. Even the basic economy class has ambient lighting. Better personal air vents and better spotlights. Even overhead storage bins are good. Service is impeccable with proper care taken of guests... +Airline: SpiceJet. Title: "Great travel experience". Review: βœ… Trip Verified | Marvelous courteous crew who took good care of all passengers. They should be rewarded for the patience shown towards the unruly ones. Great travel experience.. Rating: 10.0/10. Reviewer: Ranjita Pandey. Date: 18th April 2024. Recommended: yes Review 2: -Airline: SpiceJet. Title: "good service by the crew". Review: βœ… Trip Verified | I have had good service by the crew. It was amazing, the crew was very enthusiastic and warm welcome. It was one of the best services in my experience.. Rating: 10.0/10. Reviewer: K Mansour. Date: 10th August 2024. Recom... +Airline: SpiceJet. Title: "good service by the crew". Review: βœ… Trip Verified | I have had good service by the crew. It was amazing, the crew was very enthusiastic and warm welcome. It was one of the best services in my experience.. Rating: 10.0/10. Reviewer: K Mansour. Date: 10th August 2024. Recommended: yes Review 3: -Airline: SpiceJet. Title: "outstanding service I experienced". Review: Not Verified | I wanted to take a moment to express my sincere thanks for the outstanding service I experienced on my recent flight from Pune to Delhi. SG-8937. From the moment I boarded, the warmth and friendliness of the air h... +Airline: SpiceJet. Title: "delayed both ways by many hours". Review: Not Verified | Flight was delayed both ways by many hours. Poor service for the same price as other airlines like IndiGo. No wifi or other amenities to compensate for terrible service.. Rating: 2.0/10. Reviewer: Somil Jain Jain. Date: 20th May 2022. Recommended: no Review 4: -Airline: SpiceJet. Title: "efficient and warm onboard service". Review: βœ… Trip Verified | New Delhi to Kolkata. Delighted with the prompt, efficient and warm onboard service provided by the crew. Appreciate their efforts towards customer centricity.. Rating: 10.0/10. Reviewer: Debashis Roy. Date: 2... +Airline: SpiceJet. Title: "Excellent service". Review: βœ… Trip Verified | Excellent service by the ground staff courteous beyond expectations always willing to help in the real sense and not lipservice i will recommend to all whom I know. Rating: 10.0/10. Reviewer: Ramanathan Ramchandra. Date: 1st November 2023. Recommended: yes Review 5: -Airline: SpiceJet. Title: "Service is very good". Review: Service is very good, I am impressed with Miss Renu who gave the best services ever. Thanks to Renu who is very sweet by her nature as well as her service. Rating: 9.0/10. Reviewer: Sanjay Patnaik. Date: 21st September 2023. Recommended: ye...""", +Airline: SpiceJet. Title: "hospitality service given". Review: βœ… Trip Verified | Seats are comparable if compare to other budget airlines. Cabin crews are friendly and hospitality service given with smiles. Very happy and enjoy experience.. Rating: 8.0/10. Reviewer: A Bameen. Date: 20th March 2022. Recommended: yes""", ] # Create dictionary for backward compatibility diff --git a/notebooks/flight_search_agent_langraph/main.py b/notebooks/flight_search_agent_langraph/main.py index 1f08534..08dfe5a 100644 --- a/notebooks/flight_search_agent_langraph/main.py +++ b/notebooks/flight_search_agent_langraph/main.py @@ -24,9 +24,6 @@ from couchbase.cluster import Cluster from couchbase.exceptions import KeyspaceNotFoundException from couchbase.options import ClusterOptions -from langchain.agents import AgentExecutor, create_react_agent -from langchain_core.prompts import PromptTemplate -from langchain_core.tools import Tool from pydantic import SecretStr @@ -70,6 +67,9 @@ def find_project_root(): dotenv.load_dotenv(override=True) +# Agent Catalog tool integration - tools will be imported from tools/ directory + + class FlightSearchState(agentc_langgraph.agent.State): """State for flight search conversations - single user system.""" @@ -93,13 +93,33 @@ def __init__(self, catalog: agentc.Catalog, span: agentc.Span, chat_model=None): chat_model=chat_model, catalog=catalog, span=span, prompt_name="flight_search_assistant" ) + def _get_tool_names(self) -> set[str]: + """Get available tool names from agent metadata.""" + if hasattr(self, 'tools') and self.tools: + return {tool.name for tool in self.tools} + return set() + + def _extract_tool_results(self, messages): + """Extract tool results from messages for production display.""" + # Get available tool names dynamically + tool_names = self._get_tool_names() + + # Find the first successful ToolMessage (skip error results) + for message in messages: + if (hasattr(message, 'name') and + message.name in tool_names and + not message.content.startswith("Error:")): + return message.content + + return None + def _invoke( self, span: agentc.Span, state: FlightSearchState, config: langchain_core.runnables.RunnableConfig, ) -> FlightSearchState: - """Handle flight search conversation using ReActAgent.""" + """Handle flight search conversation using proper Agent Catalog patterns.""" # Initialize conversation if this is the first message if not state["messages"]: @@ -107,209 +127,35 @@ def _invoke( state["messages"].append(initial_msg) logger.info(f"Flight Query: {state['query']}") - # Get prompt resource first - we'll need it for the ReAct agent - prompt_resource = self.catalog.find("prompt", name="flight_search_assistant") - - # Get tools from Agent Catalog with simplified discovery - tools = [] - tool_names = [ - "lookup_flight_info", - "save_flight_booking", - "retrieve_flight_bookings", - "search_airline_reviews", - ] - - for tool_name in tool_names: - try: - # Find tool using Agent Catalog - catalog_tool = self.catalog.find("tool", name=tool_name) - if catalog_tool: - logger.info(f"βœ… Found tool: {tool_name}") - else: - logger.error(f"❌ Tool not found: {tool_name}") - continue - - except Exception as e: - logger.error(f"❌ Failed to find tool {tool_name}: {e}") - continue - - # Create wrapper function to handle proper parameter parsing - def create_tool_wrapper(original_tool, name): - """Create a wrapper for Agent Catalog tools with robust input handling.""" - - def wrapper_func(tool_input: str) -> str: - """Wrapper function that handles input parsing and error handling.""" - try: - logger.info(f"πŸ”§ Tool {name} called with raw input: {repr(tool_input)}") - - # Robust input sanitization to handle ReAct format artifacts - if isinstance(tool_input, str): - # Remove ReAct format artifacts that get mixed into input - clean_input = tool_input.strip() - - # Remove common ReAct artifacts - artifacts_to_remove = [ - '\nObservation', 'Observation', '\nThought:', 'Thought:', - '\nAction:', 'Action:', '\nAction Input:', 'Action Input:', - '\nFinal Answer:', 'Final Answer:' - ] - - for artifact in artifacts_to_remove: - if artifact in clean_input: - clean_input = clean_input.split(artifact)[0] - - # Clean up quotes and whitespace - clean_input = clean_input.strip().strip("\"'").strip() - # Normalize whitespace - clean_input = " ".join(clean_input.split()) - - tool_input = clean_input - - logger.info(f"🧹 Tool {name} cleaned input: {repr(tool_input)}") - - # Call appropriate tool with proper parameter handling - if name == "lookup_flight_info": - # Parse airport codes from input - import re - - source = None - dest = None - - # 1) Support key=value style inputs from ReAct (e.g., source_airport="JFK", destination_airport="LAX") - try: - m_src = re.search(r"source_airport\s*[:=]\s*\"?([A-Za-z]{3})\"?", tool_input, re.I) - m_dst = re.search(r"destination_airport\s*[:=]\s*\"?([A-Za-z]{3})\"?", tool_input, re.I) - if m_src and m_dst: - source = m_src.group(1).upper() - dest = m_dst.group(1).upper() - except Exception: - pass - - # 2) Fallback: comma separated codes (e.g., "JFK,LAX") - if source is None or dest is None: - if ',' in tool_input: - parts = tool_input.split(',') - if len(parts) >= 2: - source = parts[0].strip().upper() - dest = parts[1].strip().upper() - - # 3) Fallback: natural language (e.g., "JFK to LAX") - if source is None or dest is None: - words = tool_input.upper().split() - airport_codes = [w for w in words if len(w) == 3 and w.isalpha()] - if len(airport_codes) >= 2: - source, dest = airport_codes[0], airport_codes[1] - - if not source or not dest: - return "Error: Please provide source and destination airports (e.g., JFK,LAX or JFK to LAX)" - - result = original_tool.func(source_airport=source, destination_airport=dest) - - elif name == "save_flight_booking": - result = original_tool.func(booking_input=tool_input) - - elif name == "retrieve_flight_bookings": - # Handle empty input for "all bookings" - if not tool_input or tool_input.lower() in ["", "all", "none"]: - result = original_tool.func(booking_query="") - else: - result = original_tool.func(booking_query=tool_input) - - elif name == "search_airline_reviews": - if not tool_input: - return "Error: Please provide a search query for airline reviews" - result = original_tool.func(query=tool_input) - - else: - # Generic fallback - pass as first positional argument - result = original_tool.func(tool_input) - - logger.info(f"βœ… Tool {name} executed successfully") - return str(result) if result is not None else "No results found" - - except Exception as e: - error_msg = f"Error in tool {name}: {str(e)}" - logger.error(f"❌ {error_msg}") - return error_msg - - return wrapper_func - - # Create LangChain tool with descriptive information - tool_descriptions = { - "lookup_flight_info": "Find available flights between airports. Input: 'JFK,LAX' or 'JFK to LAX'. Returns flight options with airlines and aircraft.", - "save_flight_booking": "Create a flight booking. Input: 'JFK,LAX,2025-12-25' or natural language. Handles passenger count and class automatically.", - "retrieve_flight_bookings": "View existing bookings. Input: empty string for all bookings, or 'JFK,LAX,2025-12-25' for specific booking.", - "search_airline_reviews": "Search airline customer reviews. Input: 'SpiceJet service' or 'food quality'. Returns passenger reviews and ratings." - } - - langchain_tool = Tool( - name=tool_name, - description=tool_descriptions.get(tool_name, f"Tool for {tool_name.replace('_', ' ')}"), - func=create_tool_wrapper(catalog_tool, tool_name), - ) - tools.append(langchain_tool) - - # Use the Agent Catalog prompt content directly - get first result if it's a list - if isinstance(prompt_resource, list): - prompt_resource = prompt_resource[0] - - # Safely get the content from the prompt resource - prompt_content = getattr(prompt_resource, "content", "") - if not prompt_content: - prompt_content = "You are a helpful flight search assistant. Use the available tools to help users with their flight queries." - - # Inject current date into the prompt content - import datetime - - current_date = datetime.date.today().strftime("%Y-%m-%d") - prompt_content = prompt_content.replace("{current_date}", current_date) - - # Use the Agent Catalog prompt content directly - it already has ReAct format - react_prompt = PromptTemplate.from_template(str(prompt_content)) - - # Create ReAct agent with tools and prompt - agent = create_react_agent(self.chat_model, tools, react_prompt) - - # Custom parsing error handler - force stopping on parsing errors - def handle_parsing_errors(error): - """Custom handler for parsing errors - force early termination.""" - error_msg = str(error) - if "both a final answer and a parse-able action" in error_msg: - # Force early termination - return a reasonable response - return "Final Answer: I encountered a parsing error. Please reformulate your request." - elif "Missing 'Action:'" in error_msg: - return "I need to use the correct format with Action: and Action Input:" + # Use Agent Catalog's built-in create_react_agent method (like FastAPI example) + agent = self.create_react_agent(span) + + # Execute the agent with proper Agent Catalog integration + response = agent.invoke(input=state, config=config) + logger.info(f"πŸ”FULL Agent response: {response}") + + # Extract tool results instead of conversational responses for production display + if "messages" in response and response["messages"]: + # Find the first successful tool result + tool_content = self._extract_tool_results(response["messages"]) + if tool_content: + # Use tool results for production display + assistant_msg = langchain_core.messages.AIMessage(content=tool_content) + state["messages"].append(assistant_msg) + logger.info(f"πŸ“Š Tool results: {tool_content}") else: - return f"Final Answer: I encountered an error processing your request. Please try again." - - # Create agent executor - very strict: only 2 iterations max - agent_executor = AgentExecutor( - agent=agent, - tools=tools, - verbose=True, - handle_parsing_errors=handle_parsing_errors, - max_iterations=2, # STRICT: 1 tool call + 1 Final Answer only - early_stopping_method="force", # Force stop - return_intermediate_steps=True, - ) + # Fallback to last AI message if no tool results + last_message = response["messages"][-1] + state["messages"].append(last_message) + logger.info(f"πŸ“Š Agent response: {last_message.content}...") + else: + # Fallback to output field + output_content = response.get("output", "No response generated") + assistant_msg = langchain_core.messages.AIMessage(content=output_content) + state["messages"].append(assistant_msg) + logger.info(f"πŸ“Š Agent output: {output_content}...") - # Execute the agent - response = agent_executor.invoke({"input": state["query"]}) - - # Extract tool outputs from intermediate_steps and store in search_results - if "intermediate_steps" in response and response["intermediate_steps"]: - tool_outputs = [] - for step in response["intermediate_steps"]: - if isinstance(step, tuple) and len(step) >= 2: - # step[0] is the action, step[1] is the tool output/observation - tool_output = str(step[1]) - if tool_output and tool_output.strip(): - tool_outputs.append(tool_output) - state["search_results"] = tool_outputs - - # Add response to conversation - assistant_msg = langchain_core.messages.AIMessage(content=response["output"]) - state["messages"].append(assistant_msg) + # Mark as resolved state["resolved"] = True return state diff --git a/notebooks/flight_search_agent_langraph/prompts/flight_search_assistant.yaml b/notebooks/flight_search_agent_langraph/prompts/flight_search_assistant.yaml index 706ab53..c66fcf7 100644 --- a/notebooks/flight_search_agent_langraph/prompts/flight_search_assistant.yaml +++ b/notebooks/flight_search_agent_langraph/prompts/flight_search_assistant.yaml @@ -23,92 +23,31 @@ annotations: tools: - name: "lookup_flight_info" - name: "save_flight_booking" - - name: "retrieve_flight_bookings" + - name: "retrieve_flight_bookings" - name: "search_airline_reviews" # The main content of the prompt - single string format for compatibility content: > - You are a professional flight search assistant helping users with comprehensive flight operations. - Your goal is to complete each user request successfully and provide clear, helpful responses. + You are a helpful flight search assistant. I can help you with flight searches, bookings, viewing your current bookings, and finding airline reviews. - **FLIGHT SEARCH TASKS**: - When users ask to "find flights" or "search flights", call the `lookup_flight_info` tool immediately. - Use source_airport and destination_airport parameters with 3-letter airport codes. - Example: lookup_flight_info(source_airport="JFK", destination_airport="LAX") - Provide a complete, formatted list of all available flights with airline codes and aircraft types. + For each request, I'll use exactly one tool to get you the information you need. Here are some examples of what I can do: - **FLIGHT BOOKING TASKS**: - When users ask to "book a flight", call the `save_flight_booking` tool immediately with the provided details. - Use the route information (source/destination), passenger count, travel class, and dates directly from the user's request. - Example: save_flight_booking(source_airport="JFK", destination_airport="MIA", passengers=1, flight_class="economy", departure_date="2024-12-28") - Create the booking directly - do NOT look up flights first unless specifically asked to search first. - Provide the booking confirmation with booking ID and all details. + **Flight Searches:** + - "Find flights from JFK to LAX" β†’ I'll search for available flights between those airports - **BOOKING RETRIEVAL TASKS**: - When users ask to "show bookings", "get bookings", or "retrieve bookings", call the `retrieve_flight_bookings` tool. - Use empty string "" or "all" to get all bookings, or use "SOURCE,DEST,DATE" format for specific bookings. - Example: retrieve_flight_bookings("") for all bookings, or retrieve_flight_bookings("JFK,LAX,2024-12-25") for specific booking. - Format the results as a clear, organized list of current bookings with all relevant details. - If no bookings exist, inform the user politely that no bookings are found. + **Flight Bookings:** + - "Book a flight from LAX to JFK for tomorrow, 2 passengers, business class" β†’ I'll book exactly what you requested with 2 passengers in business class + - "Book an economy flight from JFK to MIA for next week, 1 passenger" β†’ I'll book 1 passenger in economy class for next week - **AIRLINE REVIEW TASKS**: - When users ask about airline reviews, feedback, or service quality, call the `search_airline_reviews` tool. - Use specific search terms like the airline name or service aspects as the query parameter. - Example: search_airline_reviews("SpiceJet food quality") or search_airline_reviews("Air India service") - If no results are found with the initial search, try alternative search terms like just the airline name or broader terms like "service" or "quality". + **Current Bookings:** + - "Show me my current flight bookings" β†’ I'll retrieve your existing bookings - **ERROR RECOVERY & COMPLETION**: - If a tool returns an error or empty results, acknowledge the issue and provide helpful guidance. - For failed searches, suggest alternative search terms and try different approaches. - For system errors, explain what went wrong in user-friendly terms and suggest next steps. - Always complete the user's request successfully - persist through errors and try alternative approaches. + **Airline Reviews:** + - "What do passengers say about SpiceJet's service quality?" β†’ I'll search for SpiceJet reviews - **MULTI-STEP WORKFLOWS**: - Some requests may require multiple tools or steps (e.g., search flights, then book one). - Complete each step thoroughly before moving to the next. - Always confirm completion of each major action (booking created, flights found, reviews located, etc.). + **Important:** I'll call one tool per request and stop immediately after getting your results. For bookings, I'll carefully extract the passenger count and class from your request (like "2 passengers, business class"). - **RESPONSE FORMATTING**: - Always provide complete, professional responses that fulfill the user's request. - Format flight information clearly with airline codes, aircraft types, and route details. - Include booking confirmations with all relevant details (booking ID, route, passengers, class, cost). - Present search results in an organized, easy-to-read format. - When tasks are completed successfully, clearly state what was accomplished. - If errors occur, explain them clearly and provide next steps or alternatives. - Be helpful and tutorial-friendly for users learning Agent Catalog and Couchbase. - - You have access to the following tools: - {tools} - - **IMPORTANT**: Always use tools directly without trying to create intermediate steps or extract information separately. - Never use actions like "Extract route information" - instead, call the appropriate tool immediately with the parameters. - - **CRITICAL**: Follow the ReAct format EXACTLY. Never mix Action and Final Answer in the same response. - When you use an Action, wait for the Observation before providing Final Answer. - - Use the following format for your responses: - - Question: the input question you must answer - Thought: I need to [identify the task type and which tool to use] - Action: the action to take, should be one of [{tool_names}] - Action Input: [the exact parameters for the tool] - Observation: the result of the action - ... (this Thought/Action/Action Input/Observation can repeat N times as needed) - Thought: I now know the final answer - Final Answer: the final answer to the original input question - - **FORMAT RULES**: - - NEVER include Final Answer in the same response as Action - - Wait for Observation after each Action before continuing - - Only provide Final Answer after you have all the information you need - - **TOOL USAGE RULES**: - - lookup_flight_info: Always use exactly 2 parameters: source_airport, destination_airport - - save_flight_booking: Use all required parameters directly from user request - - retrieve_flight_bookings: Use empty string "" for all bookings, or "SOURCE,DEST,DATE" for specific - - search_airline_reviews: Use descriptive query about what the user wants to know - - Today: {current_date} + Today's date: 2025-09-24 (tomorrow = 2025-09-25, next week = 2025-10-01) Question: {input} Thought:{agent_scratchpad} \ No newline at end of file diff --git a/notebooks/flight_search_agent_langraph/tools/retrieve_flight_bookings.py b/notebooks/flight_search_agent_langraph/tools/retrieve_flight_bookings.py index 8dac911..ff68c6b 100644 --- a/notebooks/flight_search_agent_langraph/tools/retrieve_flight_bookings.py +++ b/notebooks/flight_search_agent_langraph/tools/retrieve_flight_bookings.py @@ -38,9 +38,13 @@ def parse_booking_query(booking_query: str) -> dict: """Parse booking query input and return search parameters.""" - if not booking_query or booking_query.strip().lower() in ["", "all", "none"]: + # More robust empty input detection + if (not booking_query or + booking_query.strip() == "" or + booking_query.strip().lower() in ["all", "none", "show all", "get all"] or + len(booking_query.strip()) <= 2): # Handle single chars, colons, etc. return {"type": "all"} - + parts = booking_query.strip().split(",") if len(parts) != 3: raise ValueError("For specific booking search, use format 'source_airport,destination_airport,date'. Example: 'JFK,LAX,2024-12-25'. Or use empty string for all bookings.") diff --git a/notebooks/flight_search_agent_langraph/tools/save_flight_booking.py b/notebooks/flight_search_agent_langraph/tools/save_flight_booking.py index 76665fa..8fc0c55 100644 --- a/notebooks/flight_search_agent_langraph/tools/save_flight_booking.py +++ b/notebooks/flight_search_agent_langraph/tools/save_flight_booking.py @@ -170,20 +170,20 @@ def parse_and_validate_date(departure_date: str) -> tuple[datetime.date, str]: else: # Validate date format if not re.match(r"^\d{4}-\d{2}-\d{2}$", departure_date): - raise ValueError("Date must be in YYYY-MM-DD format. Example: 2024-12-25") + raise ValueError("Date must be in YYYY-MM-DD format. Example: 2025-12-25") dep_date = datetime.datetime.strptime(departure_date, "%Y-%m-%d").date() - - # Check if date is in the future (allow today for demo purposes) + # Allow bookings for today and future dates if dep_date < datetime.date.today(): today = datetime.date.today().strftime('%Y-%m-%d') - tomorrow = (datetime.date.today() + datetime.timedelta(days=1)).strftime('%Y-%m-%d') - raise ValueError(f"Departure date must be in the future. Today is {today}. Please use a date like {tomorrow}") - + raise ValueError(f"Departure date cannot be in the past. Today is {today}. Please use today's date or later.") + + # Add logging for debugging + logger.info(f"πŸ—“οΈ Date validation: dep_date={dep_date}, today={datetime.date.today()}, comparison={dep_date < datetime.date.today()}") return dep_date, departure_date - + except ValueError as e: if "time data" in str(e): - raise ValueError("Invalid date format. Please use YYYY-MM-DD format. Example: 2024-12-25") + raise ValueError("Invalid date format. Please use YYYY-MM-DD format. Example: 2025-12-25") raise @@ -228,11 +228,18 @@ def parse_passenger_details(original_input: str) -> tuple[int, str]: passengers = int(number_match.group(1)) # Parse class - runs independently of passenger parsing - if re.search(r'\bflight_class\s*[:=]\s*\"?business\"?', original_input, re.I) or re.search(r'\bbusiness\b', original_input, re.I): + # Enhanced patterns to catch "business class", "2 passengers, business class" etc. + if (re.search(r'\bflight_class\s*[:=]\s*["\']?business["\']?', original_input, re.I) or + re.search(r'\bbusiness\s*class\b', original_input, re.I) or + re.search(r'\bbusiness\b', original_input, re.I)): flight_class = "business" - elif re.search(r'\bflight_class\s*[:=]\s*\"?first\"?', original_input, re.I) or re.search(r'\bfirst\b', original_input, re.I): + elif (re.search(r'\bflight_class\s*[:=]\s*["\']?first["\']?', original_input, re.I) or + re.search(r'\bfirst\s*class\b', original_input, re.I) or + re.search(r'\bfirst\b', original_input, re.I)): flight_class = "first" - elif re.search(r'\bflight_class\s*[:=]\s*\"?economy\"?', original_input, re.I) or re.search(r'\beconomy\b|\bbasic\b', original_input, re.I): + elif (re.search(r'\bflight_class\s*[:=]\s*["\']?economy["\']?', original_input, re.I) or + re.search(r'\beconomy\s*class\b', original_input, re.I) or + re.search(r'\beconomy\b|\bbasic\b', original_input, re.I)): flight_class = "economy" return passengers, flight_class @@ -331,58 +338,72 @@ def format_booking_confirmation(booking_data: dict) -> str: @agentc.catalog.tool -def save_flight_booking(booking_input: str) -> str: +def save_flight_booking(source_airport: str, destination_airport: str, departure_date: str, + passengers: int = 1, flight_class: str = "economy") -> str: """ Save a flight booking to Couchbase database. - Input format: "source_airport,destination_airport,date" - Example: "JFK,LAX,2024-12-25" - - - source_airport: 3-letter airport code (e.g. JFK) - - destination_airport: 3-letter airport code (e.g. LAX) - - date: YYYY-MM-DD format + Args: + source_airport: 3-letter airport code (e.g. JFK) + destination_airport: 3-letter airport code (e.g. LAX) + departure_date: Date in YYYY-MM-DD format + passengers: Number of passengers (1-10, default: 1) + flight_class: Flight class - economy, business, or first (default: economy) - Checks for duplicate bookings before creating new ones. + Returns: + Booking confirmation message with booking ID and details """ try: + # Log parameters to debug flight_class extraction + logger.info(f"🎯 Booking parameters: source={source_airport}, dest={destination_airport}, date={departure_date}, passengers={passengers}, flight_class={flight_class}") + # Validate database connection if cluster is None: return "Database connection unavailable. Unable to save booking. Please try again later." - - # Parse and validate input - structured_input, original_input, _, _ = parse_booking_input(booking_input) - source_airport, destination_airport, departure_date = validate_booking_parts(structured_input) + + # Validate inputs with proper type checking source_airport, destination_airport = validate_airport_codes(source_airport, destination_airport) + + # Validate passenger count + if not isinstance(passengers, int) or passengers < 1 or passengers > 10: + return "Error: Number of passengers must be between 1 and 10" + + # Validate flight class + valid_classes = ["economy", "business", "first"] + if flight_class.lower() not in valid_classes: + return f"Error: Flight class must be one of: {', '.join(valid_classes)}" + flight_class = flight_class.lower() + + # Parse and validate date dep_date, departure_date = parse_and_validate_date(departure_date) - + # Setup database collection bucket_name = os.getenv("CB_BUCKET", "travel-sample") scope_name = "agentc_bookings" collection_name = f"user_bookings_{datetime.date.today().strftime('%Y%m%d')}" _ensure_collection_exists(bucket_name, scope_name, collection_name) - + # Check for duplicates duplicate_error = check_duplicate_booking( source_airport, destination_airport, departure_date, bucket_name, scope_name, collection_name) if duplicate_error: return duplicate_error - - # Parse passenger details and calculate pricing - passengers, flight_class = parse_passenger_details(original_input) + + # Calculate pricing total_price = calculate_price(flight_class, passengers) - + # Create and save booking booking_id = f"FL{dep_date.strftime('%m%d')}{str(uuid.uuid4())[:8].upper()}" booking_data = create_booking_record( booking_id, source_airport, destination_airport, departure_date, passengers, flight_class, total_price) save_booking_to_db(booking_data, bucket_name, scope_name, collection_name) - + return format_booking_confirmation(booking_data) - + except ValueError as e: return f"Error: {str(e)}" except Exception as e: logger.exception(f"Booking processing error: {e}") - return "Booking could not be processed. Please try again with format: 'source_airport,destination_airport,date' (e.g., 'JFK,LAX,2024-12-25')" + return f"Booking could not be processed: {str(e)}" diff --git a/notebooks/flight_search_agent_langraph/tools/search_airline_reviews.py b/notebooks/flight_search_agent_langraph/tools/search_airline_reviews.py index c018d77..fb8c2bd 100644 --- a/notebooks/flight_search_agent_langraph/tools/search_airline_reviews.py +++ b/notebooks/flight_search_agent_langraph/tools/search_airline_reviews.py @@ -87,9 +87,7 @@ def format_review_results(results: list, query: str) -> str: if meta_parts: metadata_info = f"[{' | '.join(meta_parts)}]\n" - # Limit content length for readability - if len(content) > 300: - content = content[:300] + "..." + # Show full content for comprehensive reviews (removed 300 character limit) formatted_results.append(f"Review {i}:\n{metadata_info}{content}") diff --git a/notebooks/hotel_search_agent_langchain/main.py b/notebooks/hotel_search_agent_langchain/main.py index 27c2d65..5ee7424 100644 --- a/notebooks/hotel_search_agent_langchain/main.py +++ b/notebooks/hotel_search_agent_langchain/main.py @@ -84,14 +84,9 @@ def setup_embeddings_service(input_type="query"): return embeddings -def setup_llm_service(application_span=None): +def setup_llm_service(): """Setup LLM service using Priority 1 (OpenAI wrappers + Capella).""" - callbacks = ( - [agentc_langchain.chat.Callback(span=application_span)] - if application_span - else None - ) - _, llm = setup_ai_services(framework="langchain", callbacks=callbacks) + _, llm = setup_ai_services(framework="langchain") return llm @@ -157,30 +152,36 @@ def setup_hotel_support_agent(): data_loader_func=load_hotel_data_to_couchbase, ) - # Setup LLM with priority order - llm = setup_llm_service(application_span) - - # Load tools and create agent - tool_search = catalog.find("tool", name="search_vector_database") - if not tool_search: - raise ValueError( - "Could not find search_vector_database tool. Make sure it's indexed with 'agentc index tools/'" - ) - - tools = [ - Tool( - name=tool_search.meta.name, - description=tool_search.meta.description, - func=tool_search.func, - ), - ] + # Setup LLM + llm = setup_llm_service() + # Get prompt and tools from Agent Catalog hotel_prompt = catalog.find("prompt", name="hotel_search_assistant") if not hotel_prompt: raise ValueError( "Could not find hotel_search_assistant prompt in catalog. Make sure it's indexed with 'agentc index prompts/'" ) + # Convert Agent Catalog tools to LangChain tools + tools = [] + for catalog_tool in hotel_prompt.tools: + langchain_tool = Tool( + name=catalog_tool.meta.name, + description=catalog_tool.meta.description, + func=catalog_tool.func, + ) + tools.append(langchain_tool) + + # Add Agent Catalog callback for proper logging integration + callback = agentc_langchain.chat.Callback( + span=application_span, + tools=tools, + output=hotel_prompt.output + ) + if llm.callbacks is None: + llm.callbacks = [] + llm.callbacks.append(callback) + custom_prompt = PromptTemplate( template=hotel_prompt.content.strip(), input_variables=["input", "agent_scratchpad"], diff --git a/notebooks/landmark_search_agent_llamaindex/main.py b/notebooks/landmark_search_agent_llamaindex/main.py index 8c5e658..57814ee 100644 --- a/notebooks/landmark_search_agent_llamaindex/main.py +++ b/notebooks/landmark_search_agent_llamaindex/main.py @@ -34,6 +34,7 @@ def find_project_root(): # Now import agentc and other modules after path is set import agentc +import agentc_llamaindex.chat from shared.agent_setup import setup_ai_services, setup_environment, test_capella_connectivity from shared.couchbase_client import create_couchbase_client @@ -121,39 +122,30 @@ def setup_environment(): logger.info(f" Index: {os.environ['CB_INDEX']}") -def create_llamaindex_agent(catalog, span): - """Create LlamaIndex ReAct agent with landmark search tool from Agent Catalog.""" +def create_llamaindex_agent(catalog): + """Create LlamaIndex ReAct agent with tools from Agent Catalog prompt.""" try: from llama_index.core.agent import ReActAgent from llama_index.core.tools import FunctionTool - # Get tools from Agent Catalog - tools = [] - - # Search landmarks tool - search_tool_result = catalog.find("tool", name="search_landmarks") - if search_tool_result: - tools.append( - FunctionTool.from_defaults( - fn=search_tool_result.func, - name="search_landmarks", - description=getattr(search_tool_result.meta, "description", None) - or "Search for landmark information using semantic vector search. Use for finding attractions, monuments, museums, parks, and other points of interest.", - ) - ) - logger.info("Loaded search_landmarks tool from AgentC") - - if not tools: - logger.warning("No tools found in Agent Catalog") - else: - logger.info(f"Loaded {len(tools)} tools from Agent Catalog") - - # Get prompt from Agent Catalog - REQUIRED, no fallbacks + # Get prompt and tools from Agent Catalog prompt_result = catalog.find("prompt", name="landmark_search_assistant") if not prompt_result: raise RuntimeError("Prompt 'landmark_search_assistant' not found in Agent Catalog") - # Try different possible attributes for the prompt content + # Convert Agent Catalog tools to LlamaIndex tools + tools = [] + for catalog_tool in prompt_result.tools: + llamaindex_tool = FunctionTool.from_defaults( + fn=catalog_tool.func, + name=catalog_tool.meta.name, + description=catalog_tool.meta.description, + ) + tools.append(llamaindex_tool) + + logger.info(f"Loaded {len(tools)} tools from Agent Catalog prompt") + + # Get prompt content system_prompt = ( getattr(prompt_result, "content", None) or getattr(prompt_result, "template", None) @@ -161,18 +153,18 @@ def create_llamaindex_agent(catalog, span): ) if not system_prompt: raise RuntimeError( - "Could not access prompt content from AgentC - prompt content is None or empty" + "Could not access prompt content from Agent Catalog - prompt content is None or empty" ) logger.info("Loaded system prompt from Agent Catalog") - # Create ReAct agent with reasonable iteration limit + # Create ReAct agent using the correct API agent = ReActAgent.from_tools( tools=tools, llm=Settings.llm, - verbose=True, # Keep on for debugging - system_prompt=system_prompt, - max_iterations=4, # Allow one tool call and finalization without warnings + max_iterations=4, + verbose=True, + context=system_prompt, # system prompt goes in 'context' parameter ) logger.info("LlamaIndex ReAct agent created successfully") @@ -195,7 +187,14 @@ def setup_landmark_agent(): client.connect() # Setup LLM and embeddings using shared module - embeddings, llm = setup_ai_services(framework="llamaindex", temperature=0.1, application_span=span) + embeddings, llm = setup_ai_services(framework="llamaindex", temperature=0.1) + + # Add Agent Catalog callback for proper logging integration + callback = agentc_llamaindex.chat.Callback(span=span) + if llm.callback_manager is not None: + llm.callback_manager.add_handler(callback) + else: + logger.warning("LLM callback_manager is None, cannot add Agent Catalog callback") # Set global LlamaIndex settings Settings.llm = llm @@ -231,7 +230,7 @@ def setup_landmark_agent(): ) # Create LlamaIndex ReAct agent - agent = create_llamaindex_agent(catalog, span) + agent = create_llamaindex_agent(catalog) return agent, client diff --git a/shared/agent_setup.py b/shared/agent_setup.py index 8e6b027..3981dbe 100644 --- a/shared/agent_setup.py +++ b/shared/agent_setup.py @@ -371,8 +371,8 @@ def setup_environment(): "CB_SCOPE": "agentc_data", "CB_COLLECTION": "hotel_data", "CB_INDEX": "hotel_data_index", - "CAPELLA_API_EMBEDDING_MODEL": "nvidia/nv-embedqa-e5-v5", - "CAPELLA_API_LLM_MODEL": "meta-llama/Llama-3.1-8B-Instruct", + "CAPELLA_API_EMBEDDING_MODEL": "nvidia/llama-3.2-nv-embedqa-1b-v2", + "CAPELLA_API_LLM_MODEL": "meta/llama-3.1-8b-instruct", "CAPELLA_API_EMBEDDING_MAX_TOKENS": "512", "NVIDIA_API_EMBEDDING_MODEL": "nvidia/nv-embedqa-e5-v5", "NVIDIA_API_LLM_MODEL": "meta/llama-3.1-70b-instruct",