Skip to content
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 5 additions & 12 deletions model_gateway/src/routers/common/mcp_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ use smg_mcp::{BuiltinToolType, McpOrchestrator, McpServerBinding, McpServerConfi
use tracing::{debug, warn};

use crate::routers::common::openai_bridge::{
apply_hosted_tool_overrides, extract_hosted_tool_overrides, FormatRegistry, ResponseFormat,
self, apply_hosted_tool_overrides, extract_hosted_tool_overrides, FormatRegistry,
ResponseFormat,
};

/// Default maximum tool loop iterations (safety limit).
Expand Down Expand Up @@ -194,11 +195,8 @@ pub fn collect_builtin_routing(
let mut routing = Vec::new();

for tool in tools {
let builtin_type = match tool {
ResponseTool::WebSearchPreview(_) => BuiltinToolType::WebSearchPreview,
ResponseTool::CodeInterpreter(_) => BuiltinToolType::CodeInterpreter,
ResponseTool::ImageGeneration(_) => BuiltinToolType::ImageGeneration,
_ => continue,
let Some(builtin_type) = openai_bridge::builtin_type_for_response_tool(tool) else {
continue;
};

if let Some((server_name, tool_name)) = mcp_orchestrator.find_builtin_server(builtin_type) {
Expand Down Expand Up @@ -238,12 +236,7 @@ pub fn collect_builtin_routing(
pub fn extract_builtin_types(tools: &[ResponseTool]) -> Vec<BuiltinToolType> {
tools
.iter()
.filter_map(|t| match t {
ResponseTool::WebSearchPreview(_) => Some(BuiltinToolType::WebSearchPreview),
ResponseTool::CodeInterpreter(_) => Some(BuiltinToolType::CodeInterpreter),
ResponseTool::ImageGeneration(_) => Some(BuiltinToolType::ImageGeneration),
_ => None,
})
.filter_map(openai_bridge::builtin_type_for_response_tool)
.collect()
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,10 @@ use super::ResponseFormat;
#[derive(Debug, Clone, Copy)]
pub struct FormatDescriptor {
pub type_str: &'static str,
/// Item-id kind without trailing `_` (e.g. `"ws"`, `"mcp"`). Joined to a
/// suffix at construction sites: `format!("{kind}_{rest}")`. Stored
/// without the underscore so callers that just need the discriminator
/// (allocator prefix, log labels) don't have to trim.
pub id_prefix: &'static str,
pub in_progress_event: &'static str,
/// `None` for formats with no intermediate phase (e.g. Passthrough).
Expand All @@ -24,11 +28,27 @@ pub struct FormatDescriptor {
pub partial_image_event: Option<&'static str>,
}

/// Reverse lookup for routers that have already received an item-type
/// string (`mcp_call`, `web_search_call`, …) from the upstream wire and
/// need to recover the matching `ResponseFormat` to consult the
/// descriptor. Returns `None` for non-format item types like
/// `function_call` or `message`.
pub fn format_from_type_str(type_str: &str) -> Option<ResponseFormat> {
match type_str {
ItemType::MCP_CALL => Some(ResponseFormat::Passthrough),
ItemType::WEB_SEARCH_CALL => Some(ResponseFormat::WebSearchCall),
ItemType::CODE_INTERPRETER_CALL => Some(ResponseFormat::CodeInterpreterCall),
ItemType::FILE_SEARCH_CALL => Some(ResponseFormat::FileSearchCall),
ItemType::IMAGE_GENERATION_CALL => Some(ResponseFormat::ImageGenerationCall),
_ => None,
}
}

pub const fn descriptor(format: ResponseFormat) -> FormatDescriptor {
match format {
ResponseFormat::WebSearchCall => FormatDescriptor {
type_str: ItemType::WEB_SEARCH_CALL,
id_prefix: "ws_",
id_prefix: "ws",
in_progress_event: WebSearchCallEvent::IN_PROGRESS,
searching_event: Some(WebSearchCallEvent::SEARCHING),
completed_event: WebSearchCallEvent::COMPLETED,
Expand All @@ -37,7 +57,7 @@ pub const fn descriptor(format: ResponseFormat) -> FormatDescriptor {
},
ResponseFormat::CodeInterpreterCall => FormatDescriptor {
type_str: ItemType::CODE_INTERPRETER_CALL,
id_prefix: "ci_",
id_prefix: "ci",
in_progress_event: CodeInterpreterCallEvent::IN_PROGRESS,
searching_event: Some(CodeInterpreterCallEvent::INTERPRETING),
completed_event: CodeInterpreterCallEvent::COMPLETED,
Expand All @@ -46,7 +66,7 @@ pub const fn descriptor(format: ResponseFormat) -> FormatDescriptor {
},
ResponseFormat::FileSearchCall => FormatDescriptor {
type_str: ItemType::FILE_SEARCH_CALL,
id_prefix: "fs_",
id_prefix: "fs",
in_progress_event: FileSearchCallEvent::IN_PROGRESS,
searching_event: Some(FileSearchCallEvent::SEARCHING),
completed_event: FileSearchCallEvent::COMPLETED,
Expand All @@ -55,7 +75,7 @@ pub const fn descriptor(format: ResponseFormat) -> FormatDescriptor {
},
ResponseFormat::ImageGenerationCall => FormatDescriptor {
type_str: ItemType::IMAGE_GENERATION_CALL,
id_prefix: "ig_",
id_prefix: "ig",
in_progress_event: ImageGenerationCallEvent::IN_PROGRESS,
searching_event: Some(ImageGenerationCallEvent::GENERATING),
completed_event: ImageGenerationCallEvent::COMPLETED,
Expand All @@ -64,7 +84,7 @@ pub const fn descriptor(format: ResponseFormat) -> FormatDescriptor {
},
ResponseFormat::Passthrough => FormatDescriptor {
type_str: ItemType::MCP_CALL,
id_prefix: "mcp_",
id_prefix: "mcp",
in_progress_event: McpEvent::CALL_IN_PROGRESS,
searching_event: None,
completed_event: McpEvent::CALL_COMPLETED,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::sync::Arc;

use dashmap::DashMap;
use smg_mcp::{inventory::ALIAS_SERVER_KEY, McpServerConfig, QualifiedToolName};
use tracing::debug;

use super::ResponseFormat;

Expand All @@ -16,6 +17,12 @@ use super::ResponseFormat;
/// `QualifiedToolName` returned by `qualified_name_for_exposed` rather than
/// rebuilding one, so we pay the two `Arc<str>` allocations once instead of
/// twice per call.
///
/// Telemetry: when the session knows the tool but the registry doesn't, we
/// log a `debug` event with the qualified name. That asymmetric miss is the
/// fingerprint of a registration-path bug (the orchestrator added the tool
/// but `populate_from_server_config` was never called for the same server),
/// and would otherwise dispatch silently as `mcp_call`.
pub fn lookup_tool_format(
session: &smg_mcp::McpToolSession<'_>,
registry: &FormatRegistry,
Expand All @@ -24,7 +31,17 @@ pub fn lookup_tool_format(
let Some(qn) = session.qualified_name_for_exposed(exposed_name) else {
return ResponseFormat::Passthrough;
};
registry.lookup(&qn)
let format = registry.lookup(&qn);
if format == ResponseFormat::Passthrough && !registry.contains(&qn) {
debug!(
exposed_name = %exposed_name,
server_key = %qn.server_key(),
tool_name = %qn.tool_name(),
"FormatRegistry miss for session-exposed tool — dispatching as Passthrough; \
check that populate_from_server_config ran for this server"
);
}
format
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

#[derive(Default, Debug, Clone)]
Expand All @@ -48,6 +65,13 @@ impl FormatRegistry {
self.lookup(&QualifiedToolName::new(server_key, tool_name))
}

/// True iff the registry has an explicit entry for `qualified`. Used by
/// [`lookup_tool_format`] to distinguish a registered Passthrough entry
/// from a missing entry that defaulted to Passthrough.
pub fn contains(&self, qualified: &QualifiedToolName) -> bool {
self.formats.contains_key(qualified)
}

fn insert(&self, qualified: QualifiedToolName, format: ResponseFormat) {
self.formats.insert(qualified, format);
}
Expand Down
9 changes: 5 additions & 4 deletions model_gateway/src/routers/common/openai_bridge/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@ pub mod response_format;
pub mod tool_descriptors;
pub mod transformer;

pub use format_descriptor::{descriptor, FormatDescriptor};
pub use format_descriptor::{descriptor, format_from_type_str, FormatDescriptor};
pub use format_registry::{lookup_tool_format, FormatRegistry};
pub use overrides::{apply_hosted_tool_overrides, extract_hosted_tool_overrides};
pub use response_format::ResponseFormat;
pub use tool_descriptors::{
build_mcp_tool_infos, chat_function_tools, configure_response_tools_approval,
function_tools_json, inject_client_visible_mcp_output_items, mcp_list_tools_item,
mcp_list_tools_json, response_tools, should_hide_output_item_json, should_hide_tool_json,
build_mcp_tool_infos, builtin_type_for_response_tool, chat_function_tools,
configure_response_tools_approval, function_tools_json, inject_client_visible_mcp_output_items,
mcp_list_tools_item, mcp_list_tools_json, response_tools, should_hide_output_item_json,
should_hide_tool_json,
};
pub use transformer::{
compact_image_generation_output, compact_image_generation_outputs_json,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,24 @@ use openai_protocol::{
},
};
use serde_json::{json, Value};
use smg_mcp::{ApprovalMode, McpToolSession};
use smg_mcp::{ApprovalMode, BuiltinToolType, McpToolSession};

/// Map a single `ResponseTool` variant to the `BuiltinToolType` the MCP
/// router uses for resolution. `None` for non-builtin tool kinds (e.g.
/// `Function`, `Mcp`, `Computer`). Centralizing here keeps the four
/// hosted families (web_search_preview / code_interpreter / file_search /
/// image_generation) in lockstep — every caller that previously matched
/// on `ResponseTool::*` independently was at risk of forgetting a variant
/// when a new builtin lands.
pub fn builtin_type_for_response_tool(tool: &ResponseTool) -> Option<BuiltinToolType> {
match tool {
ResponseTool::WebSearchPreview(_) => Some(BuiltinToolType::WebSearchPreview),
ResponseTool::CodeInterpreter(_) => Some(BuiltinToolType::CodeInterpreter),
ResponseTool::FileSearch(_) => Some(BuiltinToolType::FileSearch),
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Avoid forcing MCP for native file_search requests

When a Responses request includes file_search on a deployment without MCP components, this new mapping makes request_uses_mcp_routing() return true; both OpenAI Responses entrypoints then immediately return internal_error while fetching mcp_orchestrator/mcp_format_registry (checked model_gateway/src/routers/openai/responses/non_streaming.rs:64-76 and streaming.rs:1080-1092) before ensure_request_mcp_client() can determine that no MCP file-search server is configured and pass the native tool through. That regresses file-search-only requests that previously flowed to the upstream worker; gate FileSearch on actual MCP routing availability or allow the component-less passthrough path.

Useful? React with 👍 / 👎.

ResponseTool::ImageGeneration(_) => Some(BuiltinToolType::ImageGeneration),
_ => None,
}
}

#[inline]
fn schema_to_value(schema: &serde_json::Map<String, Value>) -> Value {
Expand Down
75 changes: 40 additions & 35 deletions model_gateway/src/routers/grpc/common/responses/streaming.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,25 @@ use crate::routers::{
grpc::harmony::responses::ToolResult,
};

pub(crate) enum OutputItemType {
/// Item-id-prefix discriminator for non-format kinds. Format-driven items
/// derive their prefix from `descriptor(format).id_prefix` instead — keeping
/// the wire-shape mapping in one place.
pub(crate) enum OutputItemKind {
Message,
McpListTools,
McpCall,
FunctionCall,
Reasoning,
WebSearchCall,
CodeInterpreterCall,
FileSearchCall,
ImageGenerationCall,
}

impl OutputItemKind {
fn id_prefix(&self) -> &'static str {
match self {
Self::Message => "msg",
Self::McpListTools => "mcpl",
Self::FunctionCall => "fc",
Self::Reasoning => "rs",
}
}
}

/// Status of an output item
Expand Down Expand Up @@ -531,18 +540,6 @@ impl ResponseStreamEventEmitter {
}
}

/// Map a `ResponseFormat` to the grpc-router-private `OutputItemType` enum.
pub fn output_item_type_for_format(response_format: Option<&ResponseFormat>) -> OutputItemType {
match response_format {
Some(ResponseFormat::WebSearchCall) => OutputItemType::WebSearchCall,
Some(ResponseFormat::CodeInterpreterCall) => OutputItemType::CodeInterpreterCall,
Some(ResponseFormat::FileSearchCall) => OutputItemType::FileSearchCall,
Some(ResponseFormat::ImageGenerationCall) => OutputItemType::ImageGenerationCall,
Some(ResponseFormat::Passthrough) => OutputItemType::McpCall,
None => OutputItemType::FunctionCall,
}
}

// ========================================================================
// Function Call Event Emission Methods
// ========================================================================
Expand Down Expand Up @@ -617,23 +614,12 @@ impl ResponseStreamEventEmitter {
format!("{}_{}", prefix, Uuid::now_v7().simple())
}

/// Allocate next output index and track item
pub fn allocate_output_index(&mut self, item_type: OutputItemType) -> (usize, String) {
/// Allocate next output index and track item, deriving the item-id from
/// `id_prefix` (e.g. `"msg"`, `"mcp"`, `"ws"` — without trailing `_`).
pub fn allocate_output_index_with_prefix(&mut self, id_prefix: &str) -> (usize, String) {
let index = self.next_output_index;
self.next_output_index += 1;

let id_prefix = match &item_type {
OutputItemType::McpListTools => "mcpl",
OutputItemType::McpCall => "mcp",
OutputItemType::FunctionCall => "fc",
OutputItemType::Message => "msg",
OutputItemType::Reasoning => "rs",
OutputItemType::WebSearchCall => "ws",
OutputItemType::CodeInterpreterCall => "ci",
OutputItemType::FileSearchCall => "fs",
OutputItemType::ImageGenerationCall => "ig",
};

let id = Self::generate_item_id(id_prefix);

self.output_items.push(OutputItemState {
Expand All @@ -645,6 +631,25 @@ impl ResponseStreamEventEmitter {
(index, id)
}

/// Convenience: allocate for a non-format kind.
pub fn allocate_output_index(&mut self, kind: OutputItemKind) -> (usize, String) {
self.allocate_output_index_with_prefix(kind.id_prefix())
}

/// Convenience: allocate for a `ResponseFormat`-driven item, falling back
/// to `function_call` (`"fc"`) when no format applies. Mirrors the prior
/// `output_item_type_for_format` mapping but reads the prefix straight
/// off the format descriptor.
pub fn allocate_output_index_for_format(
&mut self,
response_format: Option<ResponseFormat>,
) -> (usize, String) {
let prefix = response_format
.map(|f| descriptor(f).id_prefix)
.unwrap_or("fc");
self.allocate_output_index_with_prefix(prefix)
}

/// Mark output item as completed and store its data
pub fn complete_output_item(&mut self, output_index: usize) {
if let Some(item) = self
Expand Down Expand Up @@ -719,7 +724,7 @@ impl ResponseStreamEventEmitter {
reasoning_content: Option<String>,
) -> Result<(), String> {
// Allocate output index and generate ID
let (output_index, item_id) = self.allocate_output_index(OutputItemType::Reasoning);
let (output_index, item_id) = self.allocate_output_index(OutputItemKind::Reasoning);

// Build reasoning item structure
let item = json!({
Expand Down Expand Up @@ -758,7 +763,7 @@ impl ResponseStreamEventEmitter {
// Allocate output_index and item_id for this message item (once per message)
if self.current_item_id.is_none() {
let (output_index, item_id) =
self.allocate_output_index(OutputItemType::Message);
self.allocate_output_index(OutputItemKind::Message);

// Build message item structure
let item = json!({
Expand Down Expand Up @@ -931,7 +936,7 @@ impl ResponseStreamEventEmitter {
tools: &[mcp::ToolEntry],
tx: &mpsc::UnboundedSender<Result<Bytes, std::io::Error>>,
) -> Result<(), String> {
let (output_index, item_id) = self.allocate_output_index(OutputItemType::McpListTools);
let (output_index, item_id) = self.allocate_output_index(OutputItemKind::McpListTools);

// Build per-tool JSON items
let tool_items = Self::tool_entries_to_json(tools).unwrap_or_else(|e| {
Expand Down
10 changes: 2 additions & 8 deletions model_gateway/src/routers/grpc/common/responses/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -55,14 +55,8 @@ pub(crate) async fn ensure_mcp_connection(
// dispatches.
let has_builtin_tools = tools
.map(|t| {
t.iter().any(|tool| {
matches!(
tool,
ResponseTool::WebSearchPreview(_)
| ResponseTool::CodeInterpreter(_)
| ResponseTool::ImageGeneration(_)
)
})
t.iter()
.any(|tool| openai_bridge::builtin_type_for_response_tool(tool).is_some())
})
.unwrap_or(false);

Expand Down
Loading
Loading