Skip to content

Commit ef753bc

Browse files
committed
feat: add Prompts API to Responses API
1 parent 8422bd1 commit ef753bc

File tree

10 files changed

+138
-2
lines changed

10 files changed

+138
-2
lines changed

docs/_static/llama-stack-spec.html

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8962,6 +8962,10 @@
89628962
"type": "string",
89638963
"description": "The underlying LLM used for completions."
89648964
},
8965+
"prompt_id": {
8966+
"type": "string",
8967+
"description": "Prompt ID that refers to system prompt created by user and reused in current response."
8968+
},
89658969
"instructions": {
89668970
"type": "string"
89678971
},
@@ -9066,6 +9070,10 @@
90669070
"type": "string",
90679071
"description": "(Optional) ID of the previous response in a conversation"
90689072
},
9073+
"prompt_id": {
9074+
"type": "string",
9075+
"description": "(Optional) Reusable prompt created by user"
9076+
},
90699077
"status": {
90709078
"type": "string",
90719079
"description": "Current status of the response generation"
@@ -13277,6 +13285,10 @@
1327713285
"type": "string",
1327813286
"description": "(Optional) ID of the previous response in a conversation"
1327913287
},
13288+
"prompt_id": {
13289+
"type": "string",
13290+
"description": "(Optional) Reusable prompt created by user"
13291+
},
1328013292
"status": {
1328113293
"type": "string",
1328213294
"description": "Current status of the response generation"

docs/_static/llama-stack-spec.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6509,6 +6509,11 @@ components:
65096509
model:
65106510
type: string
65116511
description: The underlying LLM used for completions.
6512+
prompt_id:
6513+
type: string
6514+
description: >-
6515+
Prompt ID that refers to system prompt created by user and reused in current
6516+
response.
65126517
instructions:
65136518
type: string
65146519
previous_response_id:
@@ -6598,6 +6603,10 @@ components:
65986603
type: string
65996604
description: >-
66006605
(Optional) ID of the previous response in a conversation
6606+
prompt_id:
6607+
type: string
6608+
description: >-
6609+
(Optional) Reusable prompt created by user
66016610
status:
66026611
type: string
66036612
description: >-
@@ -9862,6 +9871,10 @@ components:
98629871
type: string
98639872
description: >-
98649873
(Optional) ID of the previous response in a conversation
9874+
prompt_id:
9875+
type: string
9876+
description: >-
9877+
(Optional) Reusable prompt created by user
98659878
status:
98669879
type: string
98679880
description: >-

llama_stack/apis/agents/agents.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -699,6 +699,7 @@ async def create_openai_response(
699699
self,
700700
input: str | list[OpenAIResponseInput],
701701
model: str,
702+
prompt_id: str | None = None,
702703
instructions: str | None = None,
703704
previous_response_id: str | None = None,
704705
store: bool | None = True,
@@ -710,9 +711,9 @@ async def create_openai_response(
710711
max_infer_iters: int | None = 10, # this is an extension to the OpenAI API
711712
) -> OpenAIResponseObject | AsyncIterator[OpenAIResponseObjectStream]:
712713
"""Create a new OpenAI response.
713-
714714
:param input: Input message(s) to create the response.
715715
:param model: The underlying LLM used for completions.
716+
:param prompt_id: Prompt ID that refers to system prompt created by user and reused in current response.
716717
:param previous_response_id: (Optional) if specified, the new response will be a continuation of the previous response. This can be used to easily fork-off new responses from existing responses.
717718
:param include: (Optional) Additional fields to include in the response.
718719
:returns: An OpenAIResponseObject.

llama_stack/apis/agents/openai_responses.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,7 @@ class OpenAIResponseObject(BaseModel):
330330
:param object: Object type identifier, always "response"
331331
:param output: List of generated output items (messages, tool calls, etc.)
332332
:param parallel_tool_calls: Whether tool calls can be executed in parallel
333+
:param prompt_id: (Optional) Reusable prompt created by user
333334
:param previous_response_id: (Optional) ID of the previous response in a conversation
334335
:param status: Current status of the response generation
335336
:param temperature: (Optional) Sampling temperature used for generation
@@ -347,6 +348,7 @@ class OpenAIResponseObject(BaseModel):
347348
output: list[OpenAIResponseOutput]
348349
parallel_tool_calls: bool = False
349350
previous_response_id: str | None = None
351+
prompt_id: str | None = None
350352
status: str
351353
temperature: float | None = None
352354
# Default to text format to avoid breaking the loading of old responses

llama_stack/core/stack.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -314,6 +314,10 @@ def add_internal_implementations(impls: dict[Api, Any], run_config: StackRunConf
314314
)
315315
impls[Api.prompts] = prompts_impl
316316

317+
# Set prompts API on agents provider if it exists
318+
if Api.agents in impls and hasattr(impls[Api.agents], "set_prompts_api"):
319+
impls[Api.agents].set_prompts_api(prompts_impl)
320+
317321

318322
# Produces a stack of providers for the given run config. Not all APIs may be
319323
# asked for in the run config.

llama_stack/providers/inline/agents/meta_reference/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ async def get_provider_impl(config: MetaReferenceAgentsImplConfig, deps: dict[Ap
2121
deps[Api.safety],
2222
deps[Api.tool_runtime],
2323
deps[Api.tool_groups],
24+
None, # prompts_api will be set later when available
2425
policy,
2526
)
2627
await impl.initialize()

llama_stack/providers/inline/agents/meta_reference/agents.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
ToolResponseMessage,
3838
UserMessage,
3939
)
40+
from llama_stack.apis.prompts import Prompts
4041
from llama_stack.apis.safety import Safety
4142
from llama_stack.apis.tools import ToolGroups, ToolRuntime
4243
from llama_stack.apis.vector_io import VectorIO
@@ -63,6 +64,7 @@ def __init__(
6364
safety_api: Safety,
6465
tool_runtime_api: ToolRuntime,
6566
tool_groups_api: ToolGroups,
67+
prompts_api: Prompts | None,
6668
policy: list[AccessRule],
6769
):
6870
self.config = config
@@ -71,6 +73,7 @@ def __init__(
7173
self.safety_api = safety_api
7274
self.tool_runtime_api = tool_runtime_api
7375
self.tool_groups_api = tool_groups_api
76+
self.prompts_api = prompts_api
7477

7578
self.in_memory_store = InmemoryKVStoreImpl()
7679
self.openai_responses_impl: OpenAIResponsesImpl | None = None
@@ -86,8 +89,14 @@ async def initialize(self) -> None:
8689
tool_runtime_api=self.tool_runtime_api,
8790
responses_store=self.responses_store,
8891
vector_io_api=self.vector_io_api,
92+
prompts_api=self.prompts_api,
8993
)
9094

95+
def set_prompts_api(self, prompts_api: Prompts) -> None:
96+
self.prompts_api = prompts_api
97+
if hasattr(self, "openai_responses_impl") and self.openai_responses_impl:
98+
self.openai_responses_impl.prompts_api = prompts_api
99+
91100
async def create_agent(
92101
self,
93102
agent_config: AgentConfig,
@@ -320,6 +329,7 @@ async def create_openai_response(
320329
self,
321330
input: str | list[OpenAIResponseInput],
322331
model: str,
332+
prompt_id: str | None = None,
323333
instructions: str | None = None,
324334
previous_response_id: str | None = None,
325335
store: bool | None = True,
@@ -333,6 +343,7 @@ async def create_openai_response(
333343
return await self.openai_responses_impl.create_openai_response(
334344
input,
335345
model,
346+
prompt_id,
336347
instructions,
337348
previous_response_id,
338349
store,

llama_stack/providers/inline/agents/meta_reference/responses/openai_responses.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,10 @@
2626
)
2727
from llama_stack.apis.inference import (
2828
Inference,
29+
OpenAIMessageParam,
2930
OpenAISystemMessageParam,
3031
)
32+
from llama_stack.apis.prompts import Prompts
3133
from llama_stack.apis.tools import ToolGroups, ToolRuntime
3234
from llama_stack.apis.vector_io import VectorIO
3335
from llama_stack.log import get_logger
@@ -57,12 +59,14 @@ def __init__(
5759
tool_runtime_api: ToolRuntime,
5860
responses_store: ResponsesStore,
5961
vector_io_api: VectorIO, # VectorIO
62+
prompts_api: Prompts,
6063
):
6164
self.inference_api = inference_api
6265
self.tool_groups_api = tool_groups_api
6366
self.tool_runtime_api = tool_runtime_api
6467
self.responses_store = responses_store
6568
self.vector_io_api = vector_io_api
69+
self.prompts_api = prompts_api
6670
self.tool_executor = ToolExecutor(
6771
tool_groups_api=tool_groups_api,
6872
tool_runtime_api=tool_runtime_api,
@@ -97,6 +101,22 @@ async def _prepend_instructions(self, messages, instructions):
97101
if instructions:
98102
messages.insert(0, OpenAISystemMessageParam(content=instructions))
99103

104+
async def _prepend_prompt(self, messages: list[OpenAIMessageParam], prompt_id: str) -> str:
105+
if not prompt_id:
106+
return None
107+
108+
try:
109+
# Check if prompt exists in Llama Stack and retrieve it
110+
prompt = await self.prompts_api.get_prompt(prompt_id)
111+
if prompt and prompt.prompt:
112+
messages.insert(0, OpenAISystemMessageParam(content=prompt.prompt))
113+
logger.info(f"Prompt {prompt_id} found")
114+
return prompt_id
115+
except ValueError:
116+
# Prompt not found, skip prepending
117+
logger.warning(f"Prompt {prompt_id} not found, skipping prompt prepending")
118+
return None
119+
100120
async def get_openai_response(
101121
self,
102122
response_id: str,
@@ -171,6 +191,7 @@ async def create_openai_response(
171191
self,
172192
input: str | list[OpenAIResponseInput],
173193
model: str,
194+
prompt_id: str | None = None,
174195
instructions: str | None = None,
175196
previous_response_id: str | None = None,
176197
store: bool | None = True,
@@ -187,6 +208,7 @@ async def create_openai_response(
187208
stream_gen = self._create_streaming_response(
188209
input=input,
189210
model=model,
211+
prompt_id=prompt_id,
190212
instructions=instructions,
191213
previous_response_id=previous_response_id,
192214
store=store,
@@ -215,6 +237,7 @@ async def _create_streaming_response(
215237
self,
216238
input: str | list[OpenAIResponseInput],
217239
model: str,
240+
prompt_id: str | None = None,
218241
instructions: str | None = None,
219242
previous_response_id: str | None = None,
220243
store: bool | None = True,
@@ -226,6 +249,9 @@ async def _create_streaming_response(
226249
# Input preprocessing
227250
input = await self._prepend_previous_response(input, previous_response_id)
228251
messages = await convert_response_input_to_chat_messages(input)
252+
253+
# Prepend reusable prompt (if provided)
254+
prompt_id = await self._prepend_prompt(messages, prompt_id)
229255
await self._prepend_instructions(messages, instructions)
230256

231257
# Structured outputs
@@ -248,6 +274,7 @@ async def _create_streaming_response(
248274
ctx=ctx,
249275
response_id=response_id,
250276
created_at=created_at,
277+
prompt_id=prompt_id,
251278
text=text,
252279
max_infer_iters=max_infer_iters,
253280
tool_executor=self.tool_executor,

llama_stack/providers/inline/agents/meta_reference/responses/streaming.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ def __init__(
5757
ctx: ChatCompletionContext,
5858
response_id: str,
5959
created_at: int,
60+
prompt_id: str | None,
6061
text: OpenAIResponseText,
6162
max_infer_iters: int,
6263
tool_executor, # Will be the tool execution logic from the main class
@@ -65,6 +66,7 @@ def __init__(
6566
self.ctx = ctx
6667
self.response_id = response_id
6768
self.created_at = created_at
69+
self.prompt_id = prompt_id
6870
self.text = text
6971
self.max_infer_iters = max_infer_iters
7072
self.tool_executor = tool_executor
@@ -83,6 +85,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
8385
object="response",
8486
status="in_progress",
8587
output=output_messages.copy(),
88+
prompt_id=self.prompt_id,
8689
text=self.text,
8790
)
8891

@@ -157,6 +160,7 @@ async def create_response(self) -> AsyncIterator[OpenAIResponseObjectStream]:
157160
model=self.ctx.model,
158161
object="response",
159162
status="completed",
163+
prompt_id=self.prompt_id,
160164
text=self.text,
161165
output=output_messages,
162166
)

tests/unit/providers/agents/meta_reference/test_openai_responses.py

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
OpenAIResponseFormatText,
4141
OpenAIUserMessageParam,
4242
)
43+
from llama_stack.apis.prompts import Prompt
4344
from llama_stack.apis.tools.tools import Tool, ToolGroups, ToolInvocationResult, ToolParameter, ToolRuntime
4445
from llama_stack.core.access_control.access_control import default_policy
4546
from llama_stack.providers.inline.agents.meta_reference.responses.openai_responses import (
@@ -79,16 +80,28 @@ def mock_vector_io_api():
7980
return vector_io_api
8081

8182

83+
@pytest.fixture
84+
def mock_prompts_api():
85+
prompts_api = AsyncMock()
86+
return prompts_api
87+
88+
8289
@pytest.fixture
8390
def openai_responses_impl(
84-
mock_inference_api, mock_tool_groups_api, mock_tool_runtime_api, mock_responses_store, mock_vector_io_api
91+
mock_inference_api,
92+
mock_tool_groups_api,
93+
mock_tool_runtime_api,
94+
mock_responses_store,
95+
mock_vector_io_api,
96+
mock_prompts_api,
8597
):
8698
return OpenAIResponsesImpl(
8799
inference_api=mock_inference_api,
88100
tool_groups_api=mock_tool_groups_api,
89101
tool_runtime_api=mock_tool_runtime_api,
90102
responses_store=mock_responses_store,
91103
vector_io_api=mock_vector_io_api,
104+
prompts_api=mock_prompts_api,
92105
)
93106

94107

@@ -868,3 +881,51 @@ async def test_create_openai_response_with_invalid_text_format(openai_responses_
868881
model=model,
869882
text=OpenAIResponseText(format={"type": "invalid"}),
870883
)
884+
885+
886+
async def test_create_openai_response_with_prompt(openai_responses_impl, mock_inference_api, mock_prompts_api):
887+
"""Test creating an OpenAI response with a prompt."""
888+
# Setup
889+
input_text = "What is the capital of Ireland?"
890+
model = "meta-llama/Llama-3.1-8B-Instruct"
891+
prompt_id = "pmpt_1234567890abcdef1234567890abcdef1234567890abcdef"
892+
prompt = Prompt(
893+
prompt="You are a helpful {{ geography }} assistant. Always provide accurate information.",
894+
prompt_id=prompt_id,
895+
version=1,
896+
variables=["geography"],
897+
is_default=True,
898+
)
899+
900+
# Mock the prompts API to return the prompt
901+
mock_prompts_api.get_prompt.return_value = prompt
902+
mock_inference_api.openai_chat_completion.return_value = fake_stream()
903+
904+
# Execute
905+
result = await openai_responses_impl.create_openai_response(
906+
input=input_text,
907+
model=model,
908+
prompt_id=prompt_id,
909+
)
910+
911+
# Verify
912+
mock_prompts_api.get_prompt.assert_called_once_with(prompt_id)
913+
mock_inference_api.openai_chat_completion.assert_called_once()
914+
call_args = mock_inference_api.openai_chat_completion.call_args
915+
sent_messages = call_args.kwargs["messages"]
916+
assert len(sent_messages) == 2
917+
918+
# Check that prompt was prepended as a system message
919+
system_messages = [msg for msg in sent_messages if msg.role == "system"]
920+
assert len(system_messages) == 1
921+
assert system_messages[0].content == prompt.prompt
922+
923+
# Test that user input exists
924+
user_messages = [msg for msg in sent_messages if msg.role == "user"]
925+
assert len(user_messages) == 1
926+
assert user_messages[0].content == input_text
927+
928+
# Verify the response
929+
assert result.model == model
930+
assert result.status == "completed"
931+
assert result.prompt_id == prompt_id

0 commit comments

Comments
 (0)