UiPath · radugheo · Oct 1, 2025
diff --git a/samples/context-grounding-retriever-agent/.python-version b/samples/context-grounding-retriever-agent/.python-version
@@ -0,0 +1 @@
+3.10
diff --git a/samples/context-grounding-retriever-agent/README.md b/samples/context-grounding-retriever-agent/README.md
@@ -0,0 +1,117 @@
+# Context Grounding Retriever Agent
+
+A news aggregator agent that uses LLMs and Context Grounding to fetch, index, and query the latest news articles. This agent demonstrates integration with UiPath Context Grounding, web search (Tavily), and LlamaIndex query engines.
+
+## Description
+
+The Context Grounding Retriever Agent is a multi-step workflow that:
+
+- **Extracts topics**: Uses LLM to identify the main topic from user queries
+- **Checks data freshness**: Verifies if indexed data is recent (within 24 hours)
+- **Searches the web**: Fetches latest news using Tavily API when data is stale
+- **Indexes content**: Adds new articles to UiPath Context Grounding index
+- **Queries intelligently**: Uses ReActAgent with context grounding to answer questions
+
+This agent combines LLM reasoning with RAG (Retrieval Augmented Generation) to provide up-to-date, grounded responses.
+
+## Project Initialization
+
+### Prerequisites
+- Python 3.10 or higher
+- UiPath CLI installed
+- UV package manager (optional but recommended)
+- Tavily API key (for web search)
+- UiPath Context Grounding index named "News-Index"
+
+### Initialize the Project
+
+1. Navigate to the project directory:
+```bash
+cd context-grounding-retriever-agent
+```
+
+2. Install dependencies using UV (recommended):
+```bash
+uv sync
+```
+
+Or using pip:
+```bash
+pip install -r requirements.txt
+```
+
+3. Set up environment variables:
+```bash
+export TAVILY_API_KEY="your_tavily_api_key"
+```
+
+## Running Locally
+
+```bash
+# setup environment first, then run:
+uipath init
+uipath run agent --input-file input.json
+```
+
+## UiPath Setup
+
+### 1. Authenticate with UiPath
+
+```bash
+uipath auth
+```
+
+Follow the prompts to authenticate with your UiPath account.
+
+### 2. Create Context Grounding Index
+
+Create an index named "News-Index" in your UiPath Orchestrator under the "Shared" folder.
+
+You can use the steps described in [this sample](https://github.com/UiPath/uipath-langchain-python/tree/main/samples/RAG-quiz-generator) as guidance.
+
+### 3. Pack and Publish the Agent
+
+```bash
+uipath pack
+uipath publish
+```
+
+This creates a package file that is then published to UiPath Orchestrator.
+
+## Input Schema
+
+The agent accepts a query as input:
+
+```json
+{
+  "query": "What's the latest news about Tesla?"
+}
+```
+
+### Output Schema
+
+The agent returns a natural language response based on indexed news articles:
+
+```json
+{
+  "result": "string (LLM-generated answer with citations)"
+}
+```
+
+## Configuration
+
+Key configuration values in `main.py`:
+
+- `INDEX_NAME = "News-Index"` - Context Grounding index name
+- `FOLDER_PATH = "Shared"` - Orchestrator folder path
+- `FRESHNESS_HOURS = 24` - Data freshness threshold in hours
+
+## Workflow Steps
+
+1. **Extract Topic**: LLM extracts the main topic from the user query
+2. **Check Freshness**: Verifies if indexed data is less than 24 hours old
+3. **Search Web** (if stale): Fetches latest news using Tavily API
+4. **Add to Index** (if stale): Ingests new articles into Context Grounding
+5. **Wait for Ingestion**: Polls until indexing completes
+6. **Query Index**: Uses ReActAgent to answer the query using indexed data
+
diff --git a/samples/context-grounding-retriever-agent/input.json b/samples/context-grounding-retriever-agent/input.json
@@ -0,0 +1,3 @@
+{
+  "query": "What's the latest news about UiPath?"
+}
diff --git a/samples/context-grounding-retriever-agent/llama_index.json b/samples/context-grounding-retriever-agent/llama_index.json
@@ -0,0 +1,7 @@
+{
+  "dependencies": ["."],
+  "workflows": {
+    "agent": "main.py:agent"
+  },
+  "env": ".env"
+}
diff --git a/samples/context-grounding-retriever-agent/main.py b/samples/context-grounding-retriever-agent/main.py
@@ -0,0 +1,214 @@
+import time
+from datetime import datetime, timedelta
+
+from langchain_community.tools.tavily_search import TavilySearchResults
+from llama_index.core import get_response_synthesizer
+from llama_index.core.agent import ReActAgent
+from llama_index.core.response_synthesizers.type import ResponseMode
+from llama_index.core.tools import QueryEngineTool, ToolMetadata
+from llama_index.core.workflow import (
+    Context,
+    Event,
+    StartEvent,
+    StopEvent,
+    Workflow,
+    step,
+)
+from uipath import UiPath
+
+from uipath_llamaindex.llms import UiPathOpenAI
+from uipath_llamaindex.query_engines import ContextGroundingQueryEngine
+
+INDEX_NAME = "News-Index"
+FOLDER_PATH = "Shared"
+FRESHNESS_HOURS = 24
+
+uipath = UiPath()
+tavily_tool = TavilySearchResults(max_results=5)
+
+
+class CheckIndexEvent(Event):
+    topic: str
+
+
+class SearchWebEvent(Event):
+    topic: str
+
+
+class AddToIndexEvent(Event):
+    topic: str
+    web_results: str
+
+
+class WaitForIngestionEvent(Event):
+    pass
+
+
+class QueryIndexEvent(Event):
+    topic: str
+
+
+async def check_index_freshness(topic: str) -> bool:
+    try:
+        index = await uipath.context_grounding.retrieve_async(
+            INDEX_NAME, folder_path=FOLDER_PATH
+        )
+
+        if index.last_ingested is None:
+            print("Index has never been ingested")
+            return False
+
+        last_ingested_time = index.last_ingested
+        current_time = datetime.now(last_ingested_time.tzinfo)
+        time_diff = current_time - last_ingested_time
+
+        print(f"Last ingested: {last_ingested_time}")
+        print(f"Time since last ingestion: {time_diff}")
+
+        if time_diff < timedelta(hours=FRESHNESS_HOURS):
+            print(f"Data is fresh (less than {FRESHNESS_HOURS} hours old)")
+            return True
+        else:
+            print(f"Data is stale (more than {FRESHNESS_HOURS} hours old)")
+            return False
+    except Exception:
+        return False
+
+
+async def in_progress_ingestion() -> bool:
+    try:
+        index = await uipath.context_grounding.retrieve_async(
+            INDEX_NAME, folder_path=FOLDER_PATH
+        )
+        status = index.last_ingestion_status
+        return status in ["Queued", "InProgress", "Running"]
+    except Exception:
+        return False
+
+
+class NewsAggregatorWorkflow(Workflow):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.llm = UiPathOpenAI(model="gpt-4o-2024-11-20")
+
+    @step
+    async def start(self, ctx: Context, ev: StartEvent) -> CheckIndexEvent:
+        query = ev.get("query", "")
+
+        if not query:
+            return StopEvent(result="No query provided")
+
+        await ctx.store.set("original_query", query)
+
+        topic_response = await self.llm.acomplete(
+            f"Extract the main topic/subject from this query. Return only the topic name, nothing else: {query}"
+        )
+        topic = str(topic_response).strip()
+        await ctx.store.set("topic", topic)
+
+        return CheckIndexEvent(topic=topic)
+
+    @step
+    async def check_index(
+        self, ctx: Context, ev: CheckIndexEvent
+    ) -> SearchWebEvent | QueryIndexEvent:
+        has_fresh_data = await check_index_freshness(ev.topic)
+
+        if has_fresh_data:
+            print(f"Found fresh data about {ev.topic} in index")
+            return QueryIndexEvent(topic=ev.topic)
+        else:
+            print(f"No fresh data found, searching web for {ev.topic}")
+            return SearchWebEvent(topic=ev.topic)
+
+    @step
+    async def search_web(self, ctx: Context, ev: SearchWebEvent) -> AddToIndexEvent:
+        print(f"Searching web for: {ev.topic}")
+
+        results = tavily_tool.invoke({"query": f"latest news about {ev.topic}"})
+
+        formatted_results = (
+            f"News about {ev.topic} (Retrieved: {datetime.now().isoformat()})\n\n"
+        )
+        for i, result in enumerate(results, 1):
+            formatted_results += f"{i}. {result.get('content', '')}\n"
+            formatted_results += f"   Source: {result.get('url', 'N/A')}\n\n"
+
+        print(f"Found {len(results)} results")
+        return AddToIndexEvent(topic=ev.topic, web_results=formatted_results)
+
+    @step
+    async def add_to_index(
+        self, ctx: Context, ev: AddToIndexEvent
+    ) -> WaitForIngestionEvent:
+        timestamp = int(time.time())
+        file_name_response = await self.llm.acomplete(
+            f"""Generate a file name from this topic, replacing spaces with underscores.
+            For instance, 'Tesla news' should be 'tesla_news'.
+            Topic: {ev.topic}
+            Return only the filename without extension."""
+        )
+        file_name = str(file_name_response).strip().replace(" ", "_")
+
+        print(f"Adding data to index with filename: {file_name}-{timestamp}.txt")
+        await uipath.context_grounding.add_to_index_async(
+            name=INDEX_NAME,
+            blob_file_path=f"{file_name}-{timestamp}.txt",
+            content_type="application/txt",
+            content=ev.web_results,
+            folder_path=FOLDER_PATH,
+        )
+
+        return WaitForIngestionEvent()
+
+    @step
+    async def wait_for_ingestion(
+        self, ctx: Context, ev: WaitForIngestionEvent
+    ) -> QueryIndexEvent | StopEvent:
+        no_of_tries = 10
+        wait_seconds = 5
+
+        while no_of_tries > 0:
+            if not await in_progress_ingestion():
+                print("Ingestion complete!")
+                topic = await ctx.store.get("topic")
+                return QueryIndexEvent(topic=topic)
+
+            no_of_tries -= 1
+            print(f"Waiting for ingestion... Retrying {no_of_tries} more time(s)")
+            time.sleep(wait_seconds)
+
+        return StopEvent(
+            result="Index ingestion took too long. Please try again later."
+        )
+
+    @step
+    async def query_index(self, ctx: Context, ev: QueryIndexEvent) -> StopEvent:
+        print(f"Querying index for: {ev.topic}")
+
+        response_synthesizer = get_response_synthesizer(
+            response_mode=ResponseMode.SIMPLE_SUMMARIZE, llm=self.llm
+        )
+
+        query_engine = ContextGroundingQueryEngine(
+            index_name=INDEX_NAME,
+            folder_path=FOLDER_PATH,
+            response_synthesizer=response_synthesizer,
+        )
+
+        tool = QueryEngineTool(
+            query_engine=query_engine,
+            metadata=ToolMetadata(
+                name="news_search",
+                description=f"Search through indexed news articles about {ev.topic}",
+            ),
+        )
+
+        agent = ReActAgent(tools=[tool], llm=self.llm, verbose=True)
+        original_query = await ctx.store.get("original_query")
+        response = await agent.run(user_msg=original_query)
+
+        return StopEvent(result=str(response))
+
+
+agent = NewsAggregatorWorkflow(timeout=180, verbose=True)
diff --git a/samples/context-grounding-retriever-agent/pyproject.toml b/samples/context-grounding-retriever-agent/pyproject.toml
@@ -0,0 +1,12 @@
+[project]
+name = "context-grounding-retriever-agent"
+version = "0.0.4"
+description = "UiPath LLamaIndex agent for context grounding indexes"
+authors = [{ name = "John Doe", email = "[email protected]" }]
+readme = { file = "README.md", content-type = "text/markdown" }
+requires-python = ">=3.10"
+dependencies = [
+    "langchain-community>=0.3.30",
+    "llama-index>=0.14.3",
+    "uipath-llamaindex>=0.0.36",
+]
diff --git a/samples/context-grounding-retriever-agent/uipath.json b/samples/context-grounding-retriever-agent/uipath.json
@@ -0,0 +1,17 @@
+{
+    "entryPoints": [
+        {
+            "filePath": "agent",
+            "uniqueId": "5991be55-ab39-4c46-acfb-3e5cdb5d40d9",
+            "type": "agent",
+            "input": {},
+            "output": {
+                "type": "object"
+            }
+        }
+    ],
+    "bindings": {
+        "version": "2.0",
+        "resources": []
+    }
+}