From 0cf00c912cdd99914b4797cf194bd36468ba6877 Mon Sep 17 00:00:00 2001
From: Miroslav Mihaylov <mirkenstein@gmail.com>
Date: Tue, 3 Sep 2024 20:11:19 -0500
Subject: [PATCH] Set indexing_key var from upstream listener

---
 content/templates/retrieval_augmented_generation.md | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/content/templates/retrieval_augmented_generation.md b/content/templates/retrieval_augmented_generation.md
index 1d56a466..c61bac6e 100644
--- a/content/templates/retrieval_augmented_generation.md
+++ b/content/templates/retrieval_augmented_generation.md
@@ -79,7 +79,7 @@ won't be necessary.
         
         CHUNK_SIZE = 200
         
-        @model(flatten=True, model_update_kwargs={})
+        @model(flatten=True, model_update_kwargs={'document_embedded': False})
         def chunker(text):
             text = text.split()
             chunks = [' '.join(text[i:i + CHUNK_SIZE]) for i in range(0, len(text), CHUNK_SIZE)]
@@ -94,7 +94,7 @@ won't be necessary.
         
         CHUNK_SIZE = 500
         
-        @model(flatten=True)
+        @model(flatten=True, model_update_kwargs={'document_embedded': False})
         def chunker(pdf_file):
             elements = partition_pdf(pdf_file)
             text = '\n'.join([e.text for e in elements])
@@ -125,6 +125,10 @@ features, or chunking your data. You can use this query to
 operate on those outputs.
 :::
 
+```python
+indexing_key = upstream_listener.outputs
+indexing_key
+```
 <!-- TABS -->
 ## Build text embedding model
 
@@ -192,7 +196,7 @@ vector_index = \
 ## Create Vector Search Model
 
 ```python
-item = {'_outputs__chunker': '<var:query>'}
+item = {indexing_key: '<var:query>'}
 ```
 
 ```python
@@ -202,7 +206,7 @@ vector_search_model = QueryModel(
     identifier="VectorSearch",
     select=db[upstream_listener.outputs].like(item, vector_index=vector_index_name, n=5).select(),
     # The _source is the identifier of the upstream data, which can be used to locate the data from upstream sources using `_source`.
-    postprocess=lambda docs: [{"text": doc['_outputs__chunker'], "_source": doc["_source"]} for doc in docs],
+    postprocess=lambda docs: [{"text": doc[indexing_key], "_source": doc["_source"]} for doc in docs],
     db=db
 )
 ```