fix null output parser of searh index tool

ylwu-amzn · ylwu-amzn · commit f6480387c869 · 2025-09-18T00:09:30.000-07:00
Signed-off-by: Yaliang Wu &lt;ylwu@amazon.com&gt;
diff --git a/docs/tutorials/agent_framework/rag/agentic_rag_bedrock_openai_oss.md b/docs/tutorials/agent_framework/rag/agentic_rag_bedrock_openai_oss.md
@@ -3,6 +3,9 @@
 ## 1.1 LLM
 
 ### 1.1.1 Create LLM
+
+- `reasoning_effort`: "low", "medium", "high"
+
 ```
 POST _plugins/_ml/models/_register
 {
@@ -19,6 +22,7 @@ POST _plugins/_ml/models/_register
       "service_name": "bedrock",
       "model": "openai.gpt-oss-120b-1:0",
       "return_data_as_map": true,
+      "reasoning_effort": "high",
       "output_processors": [
         {
           "type": "conditional",
@@ -62,7 +66,7 @@ POST _plugins/_ml/models/_register
         "headers": {
           "content-type": "application/json"
         },
-        "request_body": "{ \"system\": [{\"text\": \"${parameters.system_prompt}\"}], \"messages\": [${parameters._chat_history:-}{\"role\":\"user\",\"content\":[{\"text\":\"${parameters.prompt}\"}]}${parameters._interactions:-}]${parameters.tool_configs:-} }"
+        "request_body": "{ \"additionalModelRequestFields\": {\"reasoning_effort\": \"${parameters.reasoning_effort}\"}, \"system\": [{\"text\": \"${parameters.system_prompt}\"}], \"messages\": [${parameters._chat_history:-}{\"role\":\"user\",\"content\":[{\"text\":\"${parameters.prompt}\"}]}${parameters._interactions:-}]${parameters.tool_configs:-} }"
       }
     ],
     "client_config": {
@@ -377,7 +381,7 @@ POST /_plugins/_ml/agents/_register
       "parameters": {
         "model_id": "afArmJgBCqG4iVqlioA9",
         "embedding_model_id": "WvAemJgBCqG4iVqlaYAS",
-        "system_prompt": "You are an OpenSearch query generator that converts natural language questions into precise OpenSearch Query DSL JSON. Your ONLY response should be the valid JSON DSL query without any explanations or additional text.\n\nFollow these rules:\n1. Analyze the index mapping and sample document first, use the exact field name, DON'T use non-existing field name in generated query DSL\n2. Analyze the question to identify search criteria, filters, sorting, and result limits\n3. Extract specific parameters (fields, values, operators, size) mentioned in the question\n4. Apply the most appropriate query type (match, match_all, term, range, bool, etc.)\n5. Return ONLY the JSON query DSL with proper formatting\n\nNEURAL SEARCH GUIDANCE:\n1. OpenSearch KNN index can be identified index settings with `\"knn\": \"true\",`; or in index mapping with any field with `\"type\": \"knn_vector\"`\n2. If search KNN index, prefer to use OpenSearch neural search query which is semantic based search, and has better accuracy.\n3. OpenSearch neural search needs embedding model id, please always use this model id \"${parameters.embedding_model_id}\"\n4. In KNN indices, embedding fields follow the pattern: {text_field_name}_embedding. For example, the raw text input is \"description\", then the generated embedding for this field will be saved into KNN field \"description_embedding\". \n5. Always exclude embedding fields from search results as they contain vector arrays that clutter responses\n6. Embedding fields can be identified in index mapping with \"type\": \"knn_vector\"\n7. OpenSearch neural search query will use embedding field (knn_vector type) and embedding model id. \n\nNEURAL SEARCH QUERY CONSTRUCTION:\nWhen constructing neural search queries, follow this pattern:\n{\n  \"_source\": {\n    \"excludes\": [\n      \"{field_name}_embedding\"\n    ]\n  },\n  \"query\": {\n    \"neural\": {\n      \"{field_name}_embedding\": {\n        \"query_text\": \"your query here\",\n        \"model_id\": \"${parameters.embedding_model_id}\"\n      }\n    }\n  }\n}\n\nRESPONSE GUIDELINES:\n1. Don't return the reasoning process, just return the generated OpenSearch query.\n2. Don't wrap the generated OpenSearch query with ```json and ```\n\nExamples:\n\nQuestion: retrieve 5 documents from index test_data\n{\"query\":{\"match_all\":{}},\"size\":5}\n\nQuestion: find documents where the field title contains machine learning\n{\"query\":{\"match\":{\"title\":\"machine learning\"}}}\n\nQuestion: search for documents with the phrase artificial intelligence in the content field and return top 10 results\n{\"query\":{\"match_phrase\":{\"content\":\"artificial intelligence\"}},\"size\":10}\n\nQuestion: get documents where price is greater than 100 and category is electronics\n{\"query\":{\"bool\":{\"must\":[{\"range\":{\"price\":{\"gt\":100}}},{\"term\":{\"category\":\"electronics\"}}]}}}\n\nQuestion: find the average rating of products in the electronics category\n{\"query\":{\"term\":{\"category\":\"electronics\"}},\"aggs\":{\"avg_rating\":{\"avg\":{\"field\":\"rating\"}}},\"size\":0}\n\nQuestion: return documents sorted by date in descending order, limit to 15 results\n{\"query\":{\"match_all\":{}},\"sort\":[{\"date\":{\"order\":\"desc\"}}],\"size\":15}\n\nQuestion: which book has the introduction of AWS AgentCore\n{\"_source\":{\"excludes\":[\"book_content_embedding\"]},\"query\":{\"neural\":{\"book_content_embedding\":{\"query_text\":\"which book has the introduction of AWS AgentCore\"}}}}\n\nQuestion: how many books published in 2024\n{\"query\": {\"term\": {\"publication_year\": 2024}},\"size\": 0,\"track_total_hits\": true}\n",
+        "system_prompt": "You are an OpenSearch query generator that converts natural language questions into precise OpenSearch Query DSL JSON. Your ONLY response should be the valid JSON DSL query without any explanations or additional text.\n\nFollow these rules:\n1. Analyze the index mapping and sample document first, use the exact field name, DON'T use non-existing field name in generated query DSL\n2. Analyze the question to identify search criteria, filters, sorting, and result limits\n3. Extract specific parameters (fields, values, operators, size) mentioned in the question\n4. Apply the most appropriate query type (match, match_all, term, range, bool, etc.)\n5. Return ONLY the JSON query DSL with proper formatting.\n6. Please use standard two-letter ISO 3166-1 alpha-2 country codes (such as CN for China, US for United States, GB for United Kingdom) when build opensearch query.\n\nNEURAL SEARCH GUIDANCE:\n1. OpenSearch KNN index can be identified index settings with `\"knn\": \"true\",`; or in index mapping with any field with `\"type\": \"knn_vector\"`\n2. If search KNN index, prefer to use OpenSearch neural search query which is semantic based search, and has better accuracy.\n3. OpenSearch neural search needs embedding model id, please always use this model id \"${parameters.embedding_model_id}\"\n4. In KNN indices, embedding fields follow the pattern: {text_field_name}_embedding. For example, the raw text input is \"description\", then the generated embedding for this field will be saved into KNN field \"description_embedding\". \n5. Always exclude embedding fields from search results as they contain vector arrays that clutter responses\n6. Embedding fields can be identified in index mapping with \"type\": \"knn_vector\"\n7. OpenSearch neural search query will use embedding field (knn_vector type) and embedding model id. \n\nNEURAL SEARCH QUERY CONSTRUCTION:\nWhen constructing neural search queries, follow this pattern:\n{\n  \"_source\": {\n    \"excludes\": [\n      \"{field_name}_embedding\"\n    ]\n  },\n  \"query\": {\n    \"neural\": {\n      \"{field_name}_embedding\": {\n        \"query_text\": \"your query here\",\n        \"model_id\": \"${parameters.embedding_model_id}\"\n      }\n    }\n  }\n}\n\nRESPONSE GUIDELINES:\n1. Don't return the reasoning process, just return the generated OpenSearch query.\n2. Don't wrap the generated OpenSearch query with ```json and ```\n\nExamples:\n\nQuestion: retrieve 5 documents from index test_data\n{\"query\":{\"match_all\":{}},\"size\":5}\n\nQuestion: find documents where the field title contains machine learning\n{\"query\":{\"match\":{\"title\":\"machine learning\"}}}\n\nQuestion: search for documents with the phrase artificial intelligence in the content field and return top 10 results\n{\"query\":{\"match_phrase\":{\"content\":\"artificial intelligence\"}},\"size\":10}\n\nQuestion: get documents where price is greater than 100 and category is electronics\n{\"query\":{\"bool\":{\"must\":[{\"range\":{\"price\":{\"gt\":100}}},{\"term\":{\"category\":\"electronics\"}}]}}}\n\nQuestion: find the average rating of products in the electronics category\n{\"query\":{\"term\":{\"category\":\"electronics\"}},\"aggs\":{\"avg_rating\":{\"avg\":{\"field\":\"rating\"}}},\"size\":0}\n\nQuestion: return documents sorted by date in descending order, limit to 15 results\n{\"query\":{\"match_all\":{}},\"sort\":[{\"date\":{\"order\":\"desc\"}}],\"size\":15}\n\nQuestion: which book has the introduction of AWS AgentCore\n{\"_source\":{\"excludes\":[\"book_content_embedding\"]},\"query\":{\"neural\":{\"book_content_embedding\":{\"query_text\":\"which book has the introduction of AWS AgentCore\"}}}}\n\nQuestion: how many books published in 2024\n{\"query\": {\"term\": {\"publication_year\": 2024}},\"size\": 0,\"track_total_hits\": true}\n",
         "prompt": "The index mappoing of ${parameters.index_name}:\n${parameters.IndexMappingTool.output:-}\n\nThe sample documents of ${parameters.index_name}:\n${parameters.SearchIndexTool.output:-}\n\nPlease generate the OpenSearch query dsl for the question:\n${parameters.question}",
         "response_filter": "$.output.message.content[1].text",
         "output_processors": [
diff --git a/ml-algorithms/src/main/java/org/opensearch/ml/engine/tools/SearchIndexTool.java b/ml-algorithms/src/main/java/org/opensearch/ml/engine/tools/SearchIndexTool.java
@@ -217,7 +217,11 @@ public <T> void run(Map<String, String> originalParameters, ActionListener<T> li
                     tensors.add(ModelTensor.builder().name(name).dataAsMap(convertSearchResponseToMap(r)).build());
                     outputs.add(ModelTensors.builder().mlModelTensors(tensors).build());
                     ModelTensorOutput output = ModelTensorOutput.builder().mlModelOutputs(outputs).build();
-                    listener.onResponse((T) outputParser.parse(output));
+                    if (outputParser != null) {
+                        listener.onResponse((T) outputParser.parse(output));
+                    } else {
+                        listener.onResponse((T) output);
+                    }
                     return;
                 }
                 if (hits != null && hits.length > 0) {
@@ -227,7 +231,11 @@ public <T> void run(Map<String, String> originalParameters, ActionListener<T> li
                         String doc = GSON.toJson(docContent);
                         contextBuilder.append(doc).append("\n");
                     }
-                    listener.onResponse((T) outputParser.parse(contextBuilder.toString()));
+                    if (outputParser != null) {
+                        listener.onResponse((T) outputParser.parse(contextBuilder.toString()));
+                    } else {
+                        listener.onResponse((T) contextBuilder.toString());
+                    }
                 } else {
                     listener.onResponse((T) "");
                 }