diff --git a/crates/mcp/src/core/config.rs b/crates/mcp/src/core/config.rs index 9de390098..59ab026c3 100644 --- a/crates/mcp/src/core/config.rs +++ b/crates/mcp/src/core/config.rs @@ -304,6 +304,8 @@ pub enum BuiltinToolType { FileSearch, /// Image generation tool (OpenAI: image_generation) ImageGeneration, + /// Shell tool (OpenAI: shell) + Shell, } impl BuiltinToolType { @@ -314,6 +316,7 @@ impl BuiltinToolType { BuiltinToolType::CodeInterpreter => ResponseFormatConfig::CodeInterpreterCall, BuiltinToolType::FileSearch => ResponseFormatConfig::FileSearchCall, BuiltinToolType::ImageGeneration => ResponseFormatConfig::ImageGenerationCall, + BuiltinToolType::Shell => ResponseFormatConfig::ShellCall, } } } @@ -325,6 +328,7 @@ impl fmt::Display for BuiltinToolType { BuiltinToolType::CodeInterpreter => write!(f, "code_interpreter"), BuiltinToolType::FileSearch => write!(f, "file_search"), BuiltinToolType::ImageGeneration => write!(f, "image_generation"), + BuiltinToolType::Shell => write!(f, "shell"), } } } @@ -355,6 +359,7 @@ pub enum ResponseFormatConfig { CodeInterpreterCall, FileSearchCall, ImageGenerationCall, + ShellCall, } /// Argument mapping configuration for tool aliases. @@ -1034,6 +1039,7 @@ tools: ResponseFormatConfig::ImageGenerationCall, "\"image_generation_call\"", ), + (ResponseFormatConfig::ShellCall, "\"shell_call\""), ]; for (format, expected) in formats { @@ -1201,6 +1207,7 @@ policy: (BuiltinToolType::CodeInterpreter, "\"code_interpreter\""), (BuiltinToolType::FileSearch, "\"file_search\""), (BuiltinToolType::ImageGeneration, "\"image_generation\""), + (BuiltinToolType::Shell, "\"shell\""), ]; for (builtin_type, expected) in types { @@ -1230,6 +1237,10 @@ policy: BuiltinToolType::ImageGeneration.response_format(), ResponseFormatConfig::ImageGenerationCall ); + assert_eq!( + BuiltinToolType::Shell.response_format(), + ResponseFormatConfig::ShellCall + ); } #[test] @@ -1522,5 +1533,10 @@ servers: "code_interpreter" ); assert_eq!(BuiltinToolType::FileSearch.to_string(), "file_search"); + assert_eq!( + BuiltinToolType::ImageGeneration.to_string(), + "image_generation" + ); + assert_eq!(BuiltinToolType::Shell.to_string(), "shell"); } } diff --git a/crates/mcp/src/core/session.rs b/crates/mcp/src/core/session.rs index 8e72544e5..15af6e10c 100644 --- a/crates/mcp/src/core/session.rs +++ b/crates/mcp/src/core/session.rs @@ -864,6 +864,7 @@ impl<'a> McpToolSession<'a> { BuiltinToolType::WebSearchPreview, BuiltinToolType::CodeInterpreter, BuiltinToolType::FileSearch, + BuiltinToolType::Shell, ] .into_iter() .filter_map(|builtin_type| orchestrator.find_builtin_server(builtin_type)) diff --git a/crates/mcp/src/transform/transformer.rs b/crates/mcp/src/transform/transformer.rs index dc44c1d0f..a02700266 100644 --- a/crates/mcp/src/transform/transformer.rs +++ b/crates/mcp/src/transform/transformer.rs @@ -2,8 +2,8 @@ use openai_protocol::responses::{ CodeInterpreterCallStatus, CodeInterpreterOutput, FileSearchCallStatus, FileSearchResult, - ImageGenerationCallStatus, ResponseOutputItem, WebSearchAction, WebSearchCallStatus, - WebSearchSource, + ImageGenerationCallStatus, ResponseOutputItem, ShellCallAction, ShellCallStatus, + WebSearchAction, WebSearchCallStatus, WebSearchSource, }; use tracing::warn; @@ -101,6 +101,7 @@ impl ResponseTransformer { ResponseFormat::ImageGenerationCall => { Self::to_image_generation_call(result, tool_call_id) } + ResponseFormat::ShellCall => Self::to_shell_call(tool_call_id, arguments), } } @@ -586,6 +587,54 @@ impl ResponseTransformer { attributes: None, }) } + + /// Transform to shell_call output. + fn to_shell_call(tool_call_id: &str, arguments: &str) -> ResponseOutputItem { + let action = parse_shell_call_action(arguments); + + ResponseOutputItem::ShellCall { + id: normalize_shell_call_id(tool_call_id), + call_id: tool_call_id.to_string(), + action, + environment: None, + status: ShellCallStatus::Completed, + created_by: None, + } + } +} + +fn parse_shell_call_action(arguments: &str) -> ShellCallAction { + let Ok(value) = serde_json::from_str::(arguments) else { + warn!("Failed to parse shell_call arguments as JSON; emitting empty action"); + return empty_shell_call_action(); + }; + + let Some(object) = value.as_object() else { + warn!("Expected shell_call arguments to be a JSON object; emitting empty action"); + return empty_shell_call_action(); + }; + + let action = serde_json::json!({ + "commands": object.get("commands").cloned().unwrap_or_else(|| serde_json::json!([])), + "max_output_length": object.get("max_output_length").cloned().unwrap_or(serde_json::Value::Null), + "timeout_ms": object.get("timeout_ms").cloned().unwrap_or(serde_json::Value::Null), + }); + + serde_json::from_value::(action).unwrap_or_else(|e| { + warn!( + error = %e, + "Failed to parse shell_call action fields; emitting empty action" + ); + empty_shell_call_action() + }) +} + +fn empty_shell_call_action() -> ShellCallAction { + ShellCallAction { + commands: Vec::new(), + max_output_length: None, + timeout_ms: None, + } } /// Strip the base64 `result` payload from an `ImageGenerationCall` output @@ -633,6 +682,18 @@ fn parse_text_block_payload(item: &serde_json::Value) -> Option String { + if source_id.starts_with("sc_") { + return source_id.to_string(); + } + + source_id + .strip_prefix("fc_") + .or_else(|| source_id.strip_prefix("call_")) + .map(|stripped| format!("sc_{stripped}")) + .unwrap_or_else(|| format!("sc_{source_id}")) +} + #[cfg(test)] mod tests { use serde_json::json; @@ -1022,6 +1083,58 @@ mod tests { } } + #[test] + fn test_shell_call_transform() { + let transformed = ResponseTransformer::transform( + &json!({}), + &ResponseFormat::ShellCall, + "call-shell-1", + "server", + "shell", + r#"{"commands":["echo hello"],"timeout_ms":1000}"#, + ); + + match transformed { + ResponseOutputItem::ShellCall { + id, + call_id, + action, + environment, + status, + .. + } => { + assert_eq!(id, "sc_call-shell-1"); + assert_eq!(call_id, "call-shell-1"); + assert_eq!(action.commands, vec!["echo hello"]); + assert_eq!(action.timeout_ms, Some(1000)); + assert!(environment.is_none()); + assert_eq!(status, ShellCallStatus::Completed); + } + _ => panic!("Expected ShellCall"), + } + } + + #[test] + fn test_shell_call_transform_preserves_action_with_dispatch_metadata() { + let transformed = ResponseTransformer::transform( + &json!({}), + &ResponseFormat::ShellCall, + "call-shell-2", + "server", + "shell", + r#"{"commands":["pwd"],"timeout_ms":500,"user":"request-user"}"#, + ); + + match transformed { + ResponseOutputItem::ShellCall { action, .. } => { + assert_eq!(action.commands, vec!["pwd"]); + assert_eq!(action.timeout_ms, Some(500)); + assert_eq!(action.max_output_length, None); + } + _ => panic!("Expected ShellCall"), + } + } + #[test] fn test_file_search_transform() { let result = json!({ diff --git a/crates/mcp/src/transform/types.rs b/crates/mcp/src/transform/types.rs index f6379410e..19d98a05f 100644 --- a/crates/mcp/src/transform/types.rs +++ b/crates/mcp/src/transform/types.rs @@ -19,6 +19,8 @@ pub enum ResponseFormat { FileSearchCall, /// Transform to OpenAI image_generation_call format ImageGenerationCall, + /// Transform to OpenAI shell_call format + ShellCall, } impl ResponseFormat { @@ -36,6 +38,7 @@ impl ResponseFormat { ResponseFormat::CodeInterpreterCall => Some(BuiltinToolType::CodeInterpreter), ResponseFormat::FileSearchCall => Some(BuiltinToolType::FileSearch), ResponseFormat::ImageGenerationCall => Some(BuiltinToolType::ImageGeneration), + ResponseFormat::ShellCall => Some(BuiltinToolType::Shell), } } } @@ -48,6 +51,7 @@ impl From for ResponseFormat { ResponseFormatConfig::CodeInterpreterCall => ResponseFormat::CodeInterpreterCall, ResponseFormatConfig::FileSearchCall => ResponseFormat::FileSearchCall, ResponseFormatConfig::ImageGenerationCall => ResponseFormat::ImageGenerationCall, + ResponseFormatConfig::ShellCall => ResponseFormat::ShellCall, } } } @@ -70,6 +74,7 @@ mod tests { ResponseFormat::ImageGenerationCall, "\"image_generation_call\"", ), + (ResponseFormat::ShellCall, "\"shell_call\""), ]; for (format, expected) in formats { @@ -95,6 +100,7 @@ mod tests { BuiltinToolType::CodeInterpreter, BuiltinToolType::FileSearch, BuiltinToolType::ImageGeneration, + BuiltinToolType::Shell, ]; for kind in kinds { let fmt: ResponseFormat = kind.response_format().into(); diff --git a/model_gateway/src/routers/common/mcp_utils.rs b/model_gateway/src/routers/common/mcp_utils.rs index dd872f51b..5109e3aa3 100644 --- a/model_gateway/src/routers/common/mcp_utils.rs +++ b/model_gateway/src/routers/common/mcp_utils.rs @@ -188,6 +188,7 @@ pub fn collect_builtin_routing( ResponseTool::WebSearchPreview(_) => BuiltinToolType::WebSearchPreview, ResponseTool::CodeInterpreter(_) => BuiltinToolType::CodeInterpreter, ResponseTool::ImageGeneration(_) => BuiltinToolType::ImageGeneration, + ResponseTool::Shell(_) => BuiltinToolType::Shell, _ => continue, }; @@ -229,6 +230,7 @@ pub fn extract_builtin_types(tools: &[ResponseTool]) -> Vec { ResponseTool::WebSearchPreview(_) => Some(BuiltinToolType::WebSearchPreview), ResponseTool::CodeInterpreter(_) => Some(BuiltinToolType::CodeInterpreter), ResponseTool::ImageGeneration(_) => Some(BuiltinToolType::ImageGeneration), + ResponseTool::Shell(_) => Some(BuiltinToolType::Shell), _ => None, }) .collect() @@ -414,7 +416,7 @@ mod tests { common::Function, responses::{ CodeInterpreterTool, FunctionTool, ImageGenerationTool, McpTool, ResponseTool, - WebSearchPreviewTool, + ShellTool, WebSearchPreviewTool, }, }; use serde_json::json; @@ -699,6 +701,51 @@ mod tests { ); } + #[tokio::test] + async fn test_collect_builtin_routing_shell() { + let mut shell_tools = HashMap::new(); + shell_tools.insert( + "execute_shell_commands".to_string(), + ToolConfig { + response_format: ResponseFormatConfig::ShellCall, + ..Default::default() + }, + ); + + let config = McpConfig { + servers: vec![McpServerConfig { + name: "shell-server".to_string(), + transport: McpTransport::Streamable { + url: "http://localhost:9996/shell".to_string(), + token: None, + headers: HashMap::new(), + }, + proxy: None, + required: false, + tools: Some(shell_tools), + builtin_type: Some(BuiltinToolType::Shell), + builtin_tool_name: Some("execute_shell_commands".to_string()), + internal: false, + }], + pool: Default::default(), + proxy: None, + warmup: Vec::new(), + inventory: Default::default(), + policy: Default::default(), + }; + + let orchestrator = Arc::new(McpOrchestrator::new(config).await.unwrap()); + let tools = vec![ResponseTool::Shell(ShellTool::default())]; + + let routing = collect_builtin_routing(&orchestrator, Some(&tools)); + + assert_eq!(routing.len(), 1); + assert_eq!(routing[0].builtin_type, BuiltinToolType::Shell); + assert_eq!(routing[0].server_name, "shell-server"); + assert_eq!(routing[0].tool_name, "execute_shell_commands"); + assert_eq!(routing[0].response_format, ResponseFormat::ShellCall); + } + // ========================================================================= // ensure_request_mcp_client tests // ========================================================================= diff --git a/model_gateway/src/routers/grpc/common/responses/streaming.rs b/model_gateway/src/routers/grpc/common/responses/streaming.rs index 5cc119df8..2ae2b34f1 100644 --- a/model_gateway/src/routers/grpc/common/responses/streaming.rs +++ b/model_gateway/src/routers/grpc/common/responses/streaming.rs @@ -34,6 +34,7 @@ pub(crate) enum OutputItemType { CodeInterpreterCall, FileSearchCall, ImageGenerationCall, + ShellCall, } /// Status of an output item @@ -477,6 +478,7 @@ impl ResponseStreamEventEmitter { ResponseFormat::CodeInterpreterCall => CodeInterpreterCallEvent::IN_PROGRESS, ResponseFormat::FileSearchCall => FileSearchCallEvent::IN_PROGRESS, ResponseFormat::ImageGenerationCall => ImageGenerationCallEvent::IN_PROGRESS, + ResponseFormat::ShellCall => McpEvent::CALL_IN_PROGRESS, ResponseFormat::Passthrough => McpEvent::CALL_IN_PROGRESS, }; self.emit_tool_event(event_type, output_index, item_id) @@ -499,6 +501,7 @@ impl ResponseStreamEventEmitter { ResponseFormat::CodeInterpreterCall => CodeInterpreterCallEvent::INTERPRETING, ResponseFormat::FileSearchCall => FileSearchCallEvent::SEARCHING, ResponseFormat::ImageGenerationCall => ImageGenerationCallEvent::GENERATING, + ResponseFormat::ShellCall => return None, ResponseFormat::Passthrough => return None, }; Some(self.emit_tool_event(event_type, output_index, item_id)) @@ -550,6 +553,7 @@ impl ResponseStreamEventEmitter { ResponseFormat::CodeInterpreterCall => CodeInterpreterCallEvent::COMPLETED, ResponseFormat::FileSearchCall => FileSearchCallEvent::COMPLETED, ResponseFormat::ImageGenerationCall => ImageGenerationCallEvent::COMPLETED, + ResponseFormat::ShellCall => McpEvent::CALL_COMPLETED, ResponseFormat::Passthrough => McpEvent::CALL_COMPLETED, }; self.emit_tool_event(event_type, output_index, item_id) @@ -566,6 +570,7 @@ impl ResponseStreamEventEmitter { Some(ResponseFormat::CodeInterpreterCall) => "code_interpreter_call", Some(ResponseFormat::FileSearchCall) => "file_search_call", Some(ResponseFormat::ImageGenerationCall) => "image_generation_call", + Some(ResponseFormat::ShellCall) => "shell_call", Some(ResponseFormat::Passthrough) => "mcp_call", None => "function_call", } @@ -578,6 +583,7 @@ impl ResponseStreamEventEmitter { Some(ResponseFormat::CodeInterpreterCall) => OutputItemType::CodeInterpreterCall, Some(ResponseFormat::FileSearchCall) => OutputItemType::FileSearchCall, Some(ResponseFormat::ImageGenerationCall) => OutputItemType::ImageGenerationCall, + Some(ResponseFormat::ShellCall) => OutputItemType::ShellCall, Some(ResponseFormat::Passthrough) => OutputItemType::McpCall, None => OutputItemType::FunctionCall, } @@ -672,6 +678,7 @@ impl ResponseStreamEventEmitter { OutputItemType::CodeInterpreterCall => "ci", OutputItemType::FileSearchCall => "fs", OutputItemType::ImageGenerationCall => "ig", + OutputItemType::ShellCall => "sc", }; let id = Self::generate_item_id(id_prefix); diff --git a/model_gateway/src/routers/grpc/harmony/streaming.rs b/model_gateway/src/routers/grpc/harmony/streaming.rs index 0515a7185..500ee98aa 100644 --- a/model_gateway/src/routers/grpc/harmony/streaming.rs +++ b/model_gateway/src/routers/grpc/harmony/streaming.rs @@ -48,7 +48,7 @@ use crate::{ /// `mcp_call.arguments.delta` / `function_call.arguments.delta` events. /// /// Hosted built-in tools (`web_search_call`, `code_interpreter_call`, -/// `file_search_call`, `image_generation_call`) instead surface their +/// `file_search_call`, `image_generation_call`, `shell_call`) instead surface their /// progress through structured events emitted by the shared /// [`ResponseStreamEventEmitter`] helpers (`emit_tool_call_in_progress`, /// `emit_tool_call_searching`, `emit_tool_call_completed` — plus @@ -64,7 +64,8 @@ fn streams_arguments(response_format: Option<&ResponseFormat>) -> bool { Some(ResponseFormat::WebSearchCall) | Some(ResponseFormat::CodeInterpreterCall) | Some(ResponseFormat::FileSearchCall) - | Some(ResponseFormat::ImageGenerationCall) => false, + | Some(ResponseFormat::ImageGenerationCall) + | Some(ResponseFormat::ShellCall) => false, } } @@ -1119,7 +1120,8 @@ mod tests { ResponseFormat::WebSearchCall | ResponseFormat::CodeInterpreterCall | ResponseFormat::FileSearchCall - | ResponseFormat::ImageGenerationCall => false, + | ResponseFormat::ImageGenerationCall + | ResponseFormat::ShellCall => false, } } @@ -1167,5 +1169,12 @@ mod tests { "image_generation_call must ride the structured-event path", ); assert!(!expected_streams_arguments(&image_generation)); + + let shell = ResponseFormat::ShellCall; + assert!( + !streams_arguments(Some(&shell)), + "shell_call must ride the structured-event path", + ); + assert!(!expected_streams_arguments(&shell)); } } diff --git a/model_gateway/src/routers/openai/mcp/tool_loop.rs b/model_gateway/src/routers/openai/mcp/tool_loop.rs index 730cdd430..3c27e2aea 100644 --- a/model_gateway/src/routers/openai/mcp/tool_loop.rs +++ b/model_gateway/src/routers/openai/mcp/tool_loop.rs @@ -511,6 +511,7 @@ fn send_tool_call_intermediate_event( // it streams preview chunks; the tool_loop path only emits the coarse // in_progress → generating → completed sequence. ResponseFormat::ImageGenerationCall => ImageGenerationCallEvent::GENERATING, + ResponseFormat::ShellCall => return true, ResponseFormat::Passthrough => return true, // mcp_call has no intermediate event }; @@ -604,6 +605,7 @@ fn stable_streaming_tool_item_id( // (`to_image_generation_call`) and the 2-letter convention used by // the other hosted tool formats. ResponseFormat::ImageGenerationCall => normalize_tool_item_id_with_prefix(source_id, "ig_"), + ResponseFormat::ShellCall => normalize_tool_item_id_with_prefix(source_id, "sc_"), } } @@ -625,7 +627,8 @@ fn non_streaming_tool_item_id_source(item_id: &str, response_format: &ResponseFo ResponseFormat::WebSearchCall | ResponseFormat::CodeInterpreterCall | ResponseFormat::FileSearchCall - | ResponseFormat::ImageGenerationCall => item_id + | ResponseFormat::ImageGenerationCall + | ResponseFormat::ShellCall => item_id .strip_prefix("fc_") .or_else(|| item_id.strip_prefix("call_")) .unwrap_or(item_id)