26
26
)
27
27
from llama_stack .apis .inference import (
28
28
Inference ,
29
+ OpenAIMessageParam ,
29
30
OpenAISystemMessageParam ,
30
31
)
32
+ from llama_stack .apis .prompts import Prompts
31
33
from llama_stack .apis .tools import ToolGroups , ToolRuntime
32
34
from llama_stack .apis .vector_io import VectorIO
33
35
from llama_stack .log import get_logger
@@ -57,12 +59,14 @@ def __init__(
57
59
tool_runtime_api : ToolRuntime ,
58
60
responses_store : ResponsesStore ,
59
61
vector_io_api : VectorIO , # VectorIO
62
+ prompts_api : Prompts ,
60
63
):
61
64
self .inference_api = inference_api
62
65
self .tool_groups_api = tool_groups_api
63
66
self .tool_runtime_api = tool_runtime_api
64
67
self .responses_store = responses_store
65
68
self .vector_io_api = vector_io_api
69
+ self .prompts_api = prompts_api
66
70
self .tool_executor = ToolExecutor (
67
71
tool_groups_api = tool_groups_api ,
68
72
tool_runtime_api = tool_runtime_api ,
@@ -97,6 +101,22 @@ async def _prepend_instructions(self, messages, instructions):
97
101
if instructions :
98
102
messages .insert (0 , OpenAISystemMessageParam (content = instructions ))
99
103
104
+ async def _prepend_prompt (self , messages : list [OpenAIMessageParam ], prompt_id : str ) -> str :
105
+ if not prompt_id :
106
+ return None
107
+
108
+ try :
109
+ # Check if prompt exists in Llama Stack and retrieve it
110
+ prompt = await self .prompts_api .get_prompt (prompt_id )
111
+ if prompt and prompt .prompt :
112
+ messages .insert (0 , OpenAISystemMessageParam (content = prompt .prompt ))
113
+ logger .info (f"Prompt { prompt_id } found" )
114
+ return prompt_id
115
+ except ValueError :
116
+ # Prompt not found, skip prepending
117
+ logger .warning (f"Prompt { prompt_id } not found, skipping prompt prepending" )
118
+ return None
119
+
100
120
async def get_openai_response (
101
121
self ,
102
122
response_id : str ,
@@ -171,6 +191,7 @@ async def create_openai_response(
171
191
self ,
172
192
input : str | list [OpenAIResponseInput ],
173
193
model : str ,
194
+ prompt_id : str | None = None ,
174
195
instructions : str | None = None ,
175
196
previous_response_id : str | None = None ,
176
197
store : bool | None = True ,
@@ -187,6 +208,7 @@ async def create_openai_response(
187
208
stream_gen = self ._create_streaming_response (
188
209
input = input ,
189
210
model = model ,
211
+ prompt_id = prompt_id ,
190
212
instructions = instructions ,
191
213
previous_response_id = previous_response_id ,
192
214
store = store ,
@@ -215,6 +237,7 @@ async def _create_streaming_response(
215
237
self ,
216
238
input : str | list [OpenAIResponseInput ],
217
239
model : str ,
240
+ prompt_id : str | None = None ,
218
241
instructions : str | None = None ,
219
242
previous_response_id : str | None = None ,
220
243
store : bool | None = True ,
@@ -226,6 +249,9 @@ async def _create_streaming_response(
226
249
# Input preprocessing
227
250
input = await self ._prepend_previous_response (input , previous_response_id )
228
251
messages = await convert_response_input_to_chat_messages (input )
252
+
253
+ # Prepend reusable prompt (if provided)
254
+ prompt_id = await self ._prepend_prompt (messages , prompt_id )
229
255
await self ._prepend_instructions (messages , instructions )
230
256
231
257
# Structured outputs
@@ -248,6 +274,7 @@ async def _create_streaming_response(
248
274
ctx = ctx ,
249
275
response_id = response_id ,
250
276
created_at = created_at ,
277
+ prompt_id = prompt_id ,
251
278
text = text ,
252
279
max_infer_iters = max_infer_iters ,
253
280
tool_executor = self .tool_executor ,
0 commit comments