Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions docs/reference/mcp-internal-servers.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ servers:
```

In the current implementation, `internal: true` applies only to self-provided
MCP servers declared under `servers:`. It affects final assembled,
non-streaming MCP responses by allowing higher layers to strip internal server
tool lists and tool-call trace items before the response is returned to the
client.
MCP servers declared under `servers:`. The model may still see and call these
tools during gateway-managed tool loops, but OpenAI Responses client-facing
output hides internal non-builtin tool details before returning data to the
client. That includes final non-streaming responses, final streaming
`response.completed` events, live streaming tool-call events, live
`mcp_list_tools` events, and response envelope `tools` / `tool_choice` fields.

This flag does not currently hide streaming output, and it does not apply to
builtin-routed MCP results such as `web_search_call`, `code_interpreter_call`,
or `file_search_call`.
This flag does not apply to builtin-routed MCP results such as
`web_search_call`, `code_interpreter_call`, or `file_search_call`.

This flag is generic. It does not imply any vendor-specific behavior and does
not change transport setup or tool execution on its own.
111 changes: 96 additions & 15 deletions model_gateway/src/routers/openai/mcp/tool_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ pub(crate) async fn execute_streaming_tool_calls(

let response_format = session.tool_response_format(&call.name);
let server_label = session.resolve_tool_server_label(&call.name);
let emit_tool_events = !session.is_internal_non_builtin_tool(&call.name);

let mut arguments: Value = match serde_json::from_str(args_str) {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
Ok(v) => v,
Expand All @@ -221,13 +222,15 @@ pub(crate) async fn execute_streaming_tool_calls(
Value::String(stable_streaming_tool_item_id(&call, &response_format)),
);
}
if !send_tool_call_completion_events(
tx,
&call,
&mcp_call_item,
&response_format,
sequence_number,
) {
if emit_tool_events
&& !send_tool_call_completion_events(
tx,
&call,
&mcp_call_item,
&response_format,
sequence_number,
)
{
return false;
}
state.record_call(
Expand All @@ -242,7 +245,9 @@ pub(crate) async fn execute_streaming_tool_calls(
}
};

if !send_tool_call_intermediate_event(tx, &call, &response_format, sequence_number) {
if emit_tool_events
&& !send_tool_call_intermediate_event(tx, &call, &response_format, sequence_number)
{
return false;
}

Expand Down Expand Up @@ -297,13 +302,15 @@ pub(crate) async fn execute_streaming_tool_calls(
);
}

if !send_tool_call_completion_events(
tx,
&call,
&mcp_call_item,
&response_format,
sequence_number,
) {
if emit_tool_events
&& !send_tool_call_completion_events(
tx,
&call,
&mcp_call_item,
&response_format,
sequence_number,
)
{
return false;
}

Expand Down Expand Up @@ -1440,6 +1447,80 @@ mod tests {
));
}

#[tokio::test]
async fn streaming_tool_execution_suppresses_events_for_internal_non_builtin_tools() {
let orchestrator = McpOrchestrator::new(McpConfig {
servers: vec![McpServerConfig {
name: "internal-server".to_string(),
transport: McpTransport::Sse {
url: "http://localhost:3000/sse".to_string(),
token: None,
headers: Default::default(),
},
proxy: None,
required: false,
tools: None,
builtin_type: None,
builtin_tool_name: None,
internal: true,
}],
..Default::default()
})
.await
.expect("orchestrator");
orchestrator
.tool_inventory()
.insert_entry(ToolEntry::from_server_tool(
"internal-server",
test_tool("internal_search"),
));
let session = McpToolSession::new(
&orchestrator,
vec![McpServerBinding {
label: "internal-label".to_string(),
server_key: "internal-server".to_string(),
allowed_tools: None,
}],
"test-request",
);
let pending_call = super::FunctionCallInProgress {
call_id: "call_internal".to_string(),
name: "internal_search".to_string(),
arguments_buffer: "{not-json".to_string(),
item_id: Some("fc_internal".to_string()),
output_index: 0,
last_obfuscation: None,
assigned_output_index: Some(0),
};
let (tx, mut rx) = mpsc::unbounded_channel();
let mut state = ToolLoopState::new(ResponseInput::Text("hello".to_string()), Vec::new());
let mut sequence_number = 0;

let ok = super::execute_streaming_tool_calls(
vec![pending_call],
&session,
&tx,
&mut state,
&mut sequence_number,
"gpt-5.4",
&[],
None,
)
.await;
drop(tx);

assert!(ok);
assert_eq!(
drain_channel(&mut rx),
Comment thread
zhoug9127 marked this conversation as resolved.
Vec::<String>::new(),
"internal tool execution must not emit streaming tool events"
);
assert_eq!(state.mcp_call_items.len(), 1);
assert!(state.mcp_call_items[0]
.to_string()
.contains("internal_search"));
}

#[test]
fn emits_only_new_binding_when_resume_adds_second_tool_block() {
let existing_labels = HashSet::from(["deepwiki_ask".to_string()]);
Expand Down
Loading
Loading