diff --git a/rust/cedar-policy-mcp-schema-generator/CHANGELOG.md b/rust/cedar-policy-mcp-schema-generator/CHANGELOG.md index faa9dcb..cef8cfb 100644 --- a/rust/cedar-policy-mcp-schema-generator/CHANGELOG.md +++ b/rust/cedar-policy-mcp-schema-generator/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Adds the ability to convert a MCP tool request (and optionally response) to a Cedar request and entity data compliant to the generated Schema. - Adds support for JSON Schema tuples in MCP tool schemas, which are translated to a record of projections. +- Adds `deduplicate_entity_types` option to consolidate equivalent enum entity types (same name and variants) into a single definition at the lowest common ancestor namespace. ### Changed - Type arrays in MCP tool schemas now result in records of optional fields (encoding union of types), rather than records of projections (encoding tuples). diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_collision_existing_entity.cedarschema b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_collision_existing_entity.cedarschema new file mode 100644 index 0000000..4d23861 --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_collision_existing_entity.cedarschema @@ -0,0 +1,51 @@ +namespace MyMcpServer::tool_a::Input { + entity McpServer enum ["primary", "secondary"]; +} + +namespace MyMcpServer::tool_b::Input { + entity McpServer enum ["primary", "secondary"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + McpServer?: MyMcpServer::tool_a::Input::McpServer, + query: String + }; + + type tool_bInput = { + McpServer?: MyMcpServer::tool_b::Input::McpServer, + data: String + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + action "call_tool"; + + action "tool_a" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action "tool_b" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_collision_existing_entity.json b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_collision_existing_entity.json new file mode 100644 index 0000000..988c810 --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_collision_existing_entity.json @@ -0,0 +1,48 @@ +{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A with enum named 'McpServer' that collides with existing entity in LCA", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "McpServer": { + "type": "string", + "enum": ["primary", "secondary"], + "description": "Enum that collides with existing McpServer entity type" + }, + "query": { + "type": "string", + "description": "The query string" + } + }, + "required": ["query"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B with same enum named 'McpServer'", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "McpServer": { + "type": "string", + "enum": ["primary", "secondary"], + "description": "Same enum that collides with existing McpServer entity type" + }, + "data": { + "type": "string", + "description": "Some data" + } + }, + "required": ["data"] + } + } + } + ] + } +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants.cedarschema b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants.cedarschema new file mode 100644 index 0000000..39cea8a --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants.cedarschema @@ -0,0 +1,51 @@ +namespace MyMcpServer::tool_a::Input { + entity mode enum ["fast", "slow"]; +} + +namespace MyMcpServer::tool_b::Input { + entity mode enum ["sync", "async"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + mode?: MyMcpServer::tool_a::Input::mode, + query: String + }; + + type tool_bInput = { + mode?: MyMcpServer::tool_b::Input::mode, + query: String + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + action "call_tool"; + + action "tool_a" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action "tool_b" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants.json b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants.json new file mode 100644 index 0000000..aa73f2a --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants.json @@ -0,0 +1,48 @@ +{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A with enum 'mode' having variants fast/slow", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["fast", "slow"], + "description": "Mode for tool A" + }, + "query": { + "type": "string", + "description": "The query string" + } + }, + "required": ["query"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B with enum 'mode' having different variants sync/async", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "mode": { + "type": "string", + "enum": ["sync", "async"], + "description": "Mode for tool B - different variants from tool A" + }, + "query": { + "type": "string", + "description": "The query string" + } + }, + "required": ["query"] + } + } + } + ] + } +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants_flat.cedarschema b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants_flat.cedarschema new file mode 100644 index 0000000..da86d3c --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_same_name_different_variants_flat.cedarschema @@ -0,0 +1,47 @@ +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + mode?: MyMcpServer::tool_a_Input_mode, + query: String + }; + + type tool_bInput = { + mode?: MyMcpServer::tool_b_Input_mode, + query: String + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity tool_a_Input_mode enum ["fast", "slow"]; + + entity tool_b_Input_mode enum ["sync", "async"]; + + action "call_tool"; + + action "tool_a" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action "tool_b" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca.cedarschema b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca.cedarschema new file mode 100644 index 0000000..5739c83 --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca.cedarschema @@ -0,0 +1,61 @@ +namespace MyMcpServer::tool_x::Input::B { + entity C = { + priority?: MyMcpServer::tool_x::Input::priority, + value_c: String + }; + + entity D = { + priority?: MyMcpServer::tool_x::Input::priority, + value_d: Long + }; +} + +namespace MyMcpServer::tool_x::Input::E { + entity F = { + priority?: MyMcpServer::tool_x::Input::priority, + value_f: Bool + }; +} + +namespace MyMcpServer::tool_x::Input { + entity B = { + C: MyMcpServer::tool_x::Input::B::C, + D: MyMcpServer::tool_x::Input::B::D + }; + + entity E = { + F: MyMcpServer::tool_x::Input::E::F + }; + + entity priority enum ["high", "medium", "low"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_xInput = { + B: MyMcpServer::tool_x::Input::B, + E: MyMcpServer::tool_x::Input::E + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + action "call_tool"; + + action "tool_x" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_xInput, + session: CommonContext + } + }; +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca.json b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca.json new file mode 100644 index 0000000..497386c --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca.json @@ -0,0 +1,75 @@ +{ + "result": { + "tools": [ + { + "name": "tool_x", + "description": "Tool X with same enum at different nested depths to test LCA computation", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "B": { + "type": "object", + "properties": { + "C": { + "type": "object", + "properties": { + "priority": { + "type": "string", + "enum": ["high", "medium", "low"], + "description": "Shared enum in B::C" + }, + "value_c": { + "type": "string", + "description": "A value in C" + } + }, + "required": ["value_c"] + }, + "D": { + "type": "object", + "properties": { + "priority": { + "type": "string", + "enum": ["high", "medium", "low"], + "description": "Shared enum in B::D" + }, + "value_d": { + "type": "integer", + "description": "A value in D" + } + }, + "required": ["value_d"] + } + }, + "required": ["C", "D"] + }, + "E": { + "type": "object", + "properties": { + "F": { + "type": "object", + "properties": { + "priority": { + "type": "string", + "enum": ["high", "medium", "low"], + "description": "Shared enum in E::F" + }, + "value_f": { + "type": "boolean", + "description": "A value in F" + } + }, + "required": ["value_f"] + } + }, + "required": ["F"] + } + }, + "required": ["B", "E"] + } + } + } + ] + } +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca_flat.cedarschema b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca_flat.cedarschema new file mode 100644 index 0000000..234678b --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_three_way_lca_flat.cedarschema @@ -0,0 +1,55 @@ +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_xInput = { + B: MyMcpServer::tool_x_Input_B, + E: MyMcpServer::tool_x_Input_E + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity tool_x_Input_B = { + C: MyMcpServer::tool_x_Input_B_C, + D: MyMcpServer::tool_x_Input_B_D + }; + + entity tool_x_Input_B_C = { + priority?: MyMcpServer::tool_x_Input_priority, + value_c: String + }; + + entity tool_x_Input_B_D = { + priority?: MyMcpServer::tool_x_Input_priority, + value_d: Long + }; + + entity tool_x_Input_E = { + F: MyMcpServer::tool_x_Input_E_F + }; + + entity tool_x_Input_E_F = { + priority?: MyMcpServer::tool_x_Input_priority, + value_f: Bool + }; + + entity tool_x_Input_priority enum ["high", "medium", "low"]; + + action "call_tool"; + + action "tool_x" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_xInput, + session: CommonContext + } + }; +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools.cedarschema b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools.cedarschema new file mode 100644 index 0000000..482a9ba --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools.cedarschema @@ -0,0 +1,73 @@ +namespace MyMcpServer::tool_a::Input { + entity mode_a enum ["fast", "slow"]; +} + +namespace MyMcpServer::tool_b::Input { + entity mode_b enum ["sync", "async"]; +} + +namespace MyMcpServer::tool_c::Input { + entity format enum ["json", "xml"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + format?: MyMcpServer::format, + mode_a?: MyMcpServer::tool_a::Input::mode_a, + query: String + }; + + type tool_bInput = { + format?: MyMcpServer::format, + mode_b?: MyMcpServer::tool_b::Input::mode_b, + query: String + }; + + type tool_cInput = { + format?: MyMcpServer::tool_c::Input::format, + query: String + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity format enum ["markdown", "text"]; + + action "call_tool"; + + action "tool_a" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action "tool_b" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; + + action "tool_c" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_cInput, + session: CommonContext + } + }; +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools.json b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools.json new file mode 100644 index 0000000..33de8e1 --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools.json @@ -0,0 +1,79 @@ +{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A for testing deduplication", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["markdown", "text"], + "description": "Output format" + }, + "mode_a": { + "type": "string", + "enum": ["fast", "slow"], + "description": "Processing mode for tool A" + }, + "query": { + "type": "string", + "description": "The query string" + } + }, + "required": ["query"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B for testing deduplication", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["markdown", "text"], + "description": "Output format" + }, + "mode_b": { + "type": "string", + "enum": ["sync", "async"], + "description": "Processing mode for tool B" + }, + "query": { + "type": "string", + "description": "The query string" + } + }, + "required": ["query"] + } + } + }, + { + "name": "tool_c", + "description": "Tool C with different format variants", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["json", "xml"], + "description": "Output format for tool C" + }, + "query": { + "type": "string", + "description": "The query string" + } + }, + "required": ["query"] + } + } + } + ] + } +} diff --git a/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools_flat.cedarschema b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools_flat.cedarschema new file mode 100644 index 0000000..251da62 --- /dev/null +++ b/rust/cedar-policy-mcp-schema-generator/examples/dedup/dedup_tools_flat.cedarschema @@ -0,0 +1,67 @@ +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + format?: MyMcpServer::format, + mode_a?: MyMcpServer::tool_a_Input_mode_a, + query: String + }; + + type tool_bInput = { + format?: MyMcpServer::format, + mode_b?: MyMcpServer::tool_b_Input_mode_b, + query: String + }; + + type tool_cInput = { + format?: MyMcpServer::tool_c_Input_format, + query: String + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity format enum ["markdown", "text"]; + + entity tool_a_Input_mode_a enum ["fast", "slow"]; + + entity tool_b_Input_mode_b enum ["sync", "async"]; + + entity tool_c_Input_format enum ["json", "xml"]; + + action "call_tool"; + + action "tool_a" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action "tool_b" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; + + action "tool_c" in [Action::"call_tool"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_cInput, + session: CommonContext + } + }; +} diff --git a/rust/cedar-policy-mcp-schema-generator/src/cli/args.rs b/rust/cedar-policy-mcp-schema-generator/src/cli/args.rs index 8863523..2b87e8e 100644 --- a/rust/cedar-policy-mcp-schema-generator/src/cli/args.rs +++ b/rust/cedar-policy-mcp-schema-generator/src/cli/args.rs @@ -86,6 +86,11 @@ pub(crate) struct ConfigOptions { /// range than decimals. Decimals are limited between [-922337203685477.5808, 922337203685477.5807]. #[arg(long, default_value_t = false)] pub(crate) encode_numbers_as_decimal: bool, + /// Whether to deduplicate entity types with equivalent definitions across tools, placing + /// the shared type in the lowest common ancestor namespace (default: false). Currently + /// applies to enum entity types matched by name and variant values. + #[arg(long, default_value_t = false)] + pub(crate) deduplicate_entity_types: bool, } #[derive(ValueEnum, Clone, Copy, Debug, Serialize)] diff --git a/rust/cedar-policy-mcp-schema-generator/src/cli/exec.rs b/rust/cedar-policy-mcp-schema-generator/src/cli/exec.rs index c7d9f1e..8cf85dd 100644 --- a/rust/cedar-policy-mcp-schema-generator/src/cli/exec.rs +++ b/rust/cedar-policy-mcp-schema-generator/src/cli/exec.rs @@ -36,6 +36,7 @@ fn get_config(config_options: &ConfigOptions) -> SchemaGeneratorConfig { .erase_annotations(!config_options.keep_annotations) .flatten_namespaces(config_options.flatten_namespaces) .encode_numbers_as_decimal(config_options.encode_numbers_as_decimal) + .deduplicate_entity_types(config_options.deduplicate_entity_types) } fn read_schema(file: impl AsRef) -> Result, CliError> { diff --git a/rust/cedar-policy-mcp-schema-generator/src/generator/request.rs b/rust/cedar-policy-mcp-schema-generator/src/generator/request.rs index 95c6c82..e989943 100644 --- a/rust/cedar-policy-mcp-schema-generator/src/generator/request.rs +++ b/rust/cedar-policy-mcp-schema-generator/src/generator/request.rs @@ -26,6 +26,7 @@ use cedar_policy_core::validator::ValidatorSchema; use chrono::{DateTime, NaiveDate, NaiveDateTime, TimeZone, Utc}; use super::identifiers; +use super::schema::{DeduplicatedEntityType, EntityTypeFingerprint}; use crate::{RequestGeneratorError, SchemaGeneratorConfig}; use mcp_tools_sdk::data::{Input, Output, TypedValue}; @@ -60,6 +61,8 @@ pub struct RequestGenerator { tools: ServerDescription, root_namespace: Option, schema: ValidatorSchema, + /// Resolved deduplication decisions from the schema generator. + resolved_dedup: Option>, } #[derive(Clone, Debug)] @@ -104,12 +107,14 @@ impl RequestGenerator { tools: ServerDescription, root_namespace: Option, schema: ValidatorSchema, + resolved_dedup: Option>, ) -> Self { Self { config, tools, root_namespace, schema, + resolved_dedup, } } @@ -455,9 +460,26 @@ impl RequestGenerator { } TypedValue::Enum(s) => { let ty: EntityType = ty_name.parse()?; - let ty = ty.qualify_with(namespace); + + // Check if this enum was deduplicated to a different namespace. + // Match by base_name AND verify the current namespace is a source, + // to avoid false matches with same-named enums that have different variants. + let qualified_ty = if let Some(ref resolved) = self.resolved_dedup { + let dedup_info = resolved.iter().find(|(fp, info)| { + fp.base_name().to_string() == ty_name + && info.source_namespaces.contains(&namespace.cloned()) + }); + if let Some((_, info)) = dedup_info { + ty.qualify_with(info.lca_namespace.as_ref()) + } else { + ty.qualify_with(namespace) + } + } else { + ty.qualify_with(namespace) + }; + let eid = Eid::new(s.as_str()); - let euid = EntityUID::from_components(ty, eid, None); + let euid = EntityUID::from_components(qualified_ty, eid, None); let euid = if self.config.flatten_namespaces { flatten_name(euid) } else { @@ -2407,4 +2429,204 @@ mod test { "Invalid principal type should produce an error" ); } + + #[test] + fn test_generate_request_dedup_enum_resolves_to_lca() { + // Two tools share `format` enum ["markdown", "text"]. With dedup enabled, + // the request generator should resolve the enum to the LCA namespace (Test) + // rather than the tool-local namespace. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["markdown", "text"] + } + }, + "required": ["format"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["markdown", "text"] + } + }, + "required": ["format"] + } + } + } + ] + } + }"#; + + let config = SchemaGeneratorConfig::default().deduplicate_entity_types(true); + let mut schema_generator = get_schema_generator(config); + let description = + ServerDescription::from_json_str(tools_json).expect("Failed to parse tools JSON"); + schema_generator + .add_actions_from_server_description(&description) + .expect("Failed to add server description"); + + let request_generator = schema_generator + .new_request_generator() + .expect("Failed to create request generator"); + + let input = Input::from_json_str( + r#"{ + "params": { + "tool": "tool_a", + "args": { "format": "markdown" } + } + }"#, + ) + .expect("Failed to parse input"); + + let principal = r#"Test::user::"""#.parse::().unwrap(); + let resource = r#"Test::resource::"""#.parse::().unwrap(); + + let (request, _entities) = request_generator + .generate_request( + principal, + resource, + Context::empty(), + Entities::new(), + &input, + None, + ) + .expect("Failed to generate request"); + + // The enum should resolve to the LCA namespace (Test::format) not the tool-local one + assert_matches!(request.context(), Some(Context::Value(kvs)) if { + let map = &**kvs; + matches!(map.get("input").map(Value::value_kind), Some(ValueKind::Record(ikvs)) if { + let map = &**ikvs; + matches!(map.get("format").map(Value::value_kind), Some(ValueKind::Lit(Literal::EntityUID(eid))) if { + **eid == "Test::format::\"markdown\"".parse().expect("Failed to parse EID") + }) + }) + }); + } + + #[test] + fn test_generate_request_dedup_enum_non_deduped_stays_local() { + // tool_a and tool_b share `format` ["markdown", "text"] (deduplicated to LCA). + // tool_c has `format` ["json", "xml"] (not deduplicated — stays local). + // Verify that tool_c's enum resolves to its local namespace. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["markdown", "text"] + } + }, + "required": ["format"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["markdown", "text"] + } + }, + "required": ["format"] + } + } + }, + { + "name": "tool_c", + "description": "Tool C", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "format": { + "type": "string", + "enum": ["json", "xml"] + } + }, + "required": ["format"] + } + } + } + ] + } + }"#; + + let config = SchemaGeneratorConfig::default().deduplicate_entity_types(true); + let mut schema_generator = get_schema_generator(config); + let description = + ServerDescription::from_json_str(tools_json).expect("Failed to parse tools JSON"); + schema_generator + .add_actions_from_server_description(&description) + .expect("Failed to add server description"); + + let request_generator = schema_generator + .new_request_generator() + .expect("Failed to create request generator"); + + let input = Input::from_json_str( + r#"{ + "params": { + "tool": "tool_c", + "args": { "format": "json" } + } + }"#, + ) + .expect("Failed to parse input"); + + let principal = r#"Test::user::"""#.parse::().unwrap(); + let resource = r#"Test::resource::"""#.parse::().unwrap(); + + let (request, _entities) = request_generator + .generate_request( + principal, + resource, + Context::empty(), + Entities::new(), + &input, + None, + ) + .expect("Failed to generate request"); + + // tool_c's format should resolve locally, NOT to the LCA + assert_matches!(request.context(), Some(Context::Value(kvs)) if { + let map = &**kvs; + matches!(map.get("input").map(Value::value_kind), Some(ValueKind::Record(ikvs)) if { + let map = &**ikvs; + matches!(map.get("format").map(Value::value_kind), Some(ValueKind::Lit(Literal::EntityUID(eid))) if { + **eid == "Test::tool_c::Input::format::\"json\"".parse().expect("Failed to parse EID") + }) + }) + }); + } } diff --git a/rust/cedar-policy-mcp-schema-generator/src/generator/schema.rs b/rust/cedar-policy-mcp-schema-generator/src/generator/schema.rs index e80e664..307df3e 100644 --- a/rust/cedar-policy-mcp-schema-generator/src/generator/schema.rs +++ b/rust/cedar-policy-mcp-schema-generator/src/generator/schema.rs @@ -17,7 +17,7 @@ use super::identifiers; use crate::{RequestGenerator, SchemaGeneratorError}; -use cedar_policy_core::ast::{InternalName, Name, UnreservedId}; +use cedar_policy_core::ast::{Id, InternalName, Name, UnreservedId}; use cedar_policy_core::est::Annotations; use cedar_policy_core::validator::{ json_schema::{ @@ -33,7 +33,7 @@ use nonempty::NonEmpty; use smol_str::{SmolStr, ToSmolStr}; -use std::collections::{btree_map::Entry, BTreeMap, HashMap}; +use std::collections::{btree_map::Entry, BTreeMap, HashMap, HashSet}; /// A type reserved to configure how the schema generator functions #[derive(Debug, Clone)] @@ -43,6 +43,7 @@ pub struct SchemaGeneratorConfig { pub(crate) erase_annotations: bool, pub(crate) flatten_namespaces: bool, pub(crate) numbers_as_decimal: bool, + pub(crate) deduplicate_entity_types: bool, } impl SchemaGeneratorConfig { @@ -120,6 +121,21 @@ impl SchemaGeneratorConfig { ..self } } + + /// Updates config to set `deduplicate_entity_types` to `val` (default: false) + /// + /// If `deduplicate_entity_types` is set to `true`, then entity types with + /// equivalent definitions across multiple tools will be consolidated into + /// a single entity type placed in the lowest common ancestor namespace. + /// + /// Currently supports enum entity types (matched by name + variant values). + /// Future versions may extend to structural entity types. + pub fn deduplicate_entity_types(self, val: bool) -> Self { + Self { + deduplicate_entity_types: val, + ..self + } + } } impl Default for SchemaGeneratorConfig { @@ -130,10 +146,136 @@ impl Default for SchemaGeneratorConfig { erase_annotations: true, flatten_namespaces: false, numbers_as_decimal: false, + deduplicate_entity_types: false, } } } +/// A fingerprint that uniquely identifies an entity type's definition. +/// Two entity types are considered equivalent (and thus deduplication candidates) +/// if and only if they produce the same fingerprint. +/// +/// Designed as an enum to support future extension to other entity type kinds. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub(crate) enum EntityTypeFingerprint { + /// Fingerprint for enum entity types: matched by name + ordered variant values. + Enum { + base_name: UnreservedId, + variants: Vec, + }, +} + +impl EntityTypeFingerprint { + pub(crate) fn base_name(&self) -> &UnreservedId { + match self { + Self::Enum { base_name, .. } => base_name, + } + } +} + +/// The resolved placement for a deduplicated entity type. +#[derive(Debug, Clone)] +pub(crate) struct DeduplicatedEntityType { + /// The LCA namespace where the shared entity type will be placed + pub(crate) lca_namespace: Option, + /// The original namespaces where this entity type appeared before dedup + pub(crate) source_namespaces: Vec>, +} + +/// Tracks all entity type occurrences and computes deduplication decisions. +#[derive(Debug, Clone, Default)] +struct DeduplicationMap { + /// Maps each unique fingerprint to the namespaces where it was seen + occurrences: HashMap>>, +} + +impl DeduplicationMap { + fn record(&mut self, fingerprint: EntityTypeFingerprint, namespace: Option) { + self.occurrences + .entry(fingerprint) + .or_default() + .push(namespace); + } + + /// Resolve all duplicates: returns a map from fingerprint to placement info + /// for entity types that appear in more than one namespace. + fn resolve_duplicates(&self) -> HashMap { + self.occurrences + .iter() + .filter(|(_, namespaces)| namespaces.len() > 1) + .map(|(fp, namespaces)| { + let lca = compute_lca(namespaces); + ( + fp.clone(), + DeduplicatedEntityType { + lca_namespace: lca, + source_namespaces: namespaces.clone(), + }, + ) + }) + .collect() + } +} + +/// Compute the lowest common ancestor namespace of a set of namespaces. +/// +/// Namespaces are hierarchical (e.g., `MyMcpServer::tool_a::Input`). +/// The LCA is the longest common prefix of all namespace paths. +/// +/// Examples: +/// - LCA of `A::B::C` and `A::B::D` is `A::B` +/// - LCA of `A::B::C` and `A::D::E` is `A` +/// - LCA of `A::B` and `A::B` is `A::B` +/// +/// For the root namespace case (`None`), the LCA is `None` (global namespace). +fn compute_lca(namespaces: &[Option]) -> Option { + // Empty list → None + if namespaces.is_empty() { + return None; + } + + // Collect all Some values; if any namespace is None (global), the LCA is None + let names: Vec<&Name> = namespaces + .iter() + .map(|ns| ns.as_ref()) + .collect::>>()?; + + // Get path segments (namespace components + basename) as Vec<&Id> for each name + let segment_lists: Vec> = names + .iter() + .map(|name| { + let internal: &InternalName = name.as_ref(); + internal + .namespace_components() + .chain(std::iter::once(internal.basename())) + .collect() + }) + .collect(); + + let first = segment_lists.first()?; + let mut prefix_len = 0; + + for (i, segment) in first.iter().enumerate() { + if segment_lists + .iter() + .all(|segs: &Vec<&Id>| segs.get(i) == Some(segment)) + { + prefix_len = i + 1; + } else { + break; + } + } + + if prefix_len == 0 { + return None; + } + + let prefix = first.get(..prefix_len)?; + let basename = (*prefix.last()?).clone(); + let path = prefix.get(..prefix.len() - 1)?.iter().map(|&id| id.clone()); + Name::try_from(InternalName::new(basename, path, None)).ok() +} + /// A type that allows constructing a Cedar Schema (Fragment) /// from an input Cedar Schema Stub that defines the Cedar Type of /// MCP principals, MCP Resources, and common MCP Contects. @@ -151,6 +293,10 @@ pub struct SchemaGenerator { actions: Option>>, config: SchemaGeneratorConfig, tools: ServerDescription, + /// Resolved deduplication decisions, populated during pass 1 + /// (only when deduplicate_entity_types is true). + /// Maps fingerprint → placement info for entity types that appear in multiple tools. + resolved_dedup: Option>, } impl SchemaGenerator { @@ -290,6 +436,7 @@ impl SchemaGenerator { actions, config, tools: ServerDescription::new(Vec::new().into_iter(), HashMap::new()), + resolved_dedup: None, }) } @@ -314,9 +461,155 @@ impl SchemaGenerator { self.tools.clone(), self.namespace.clone(), schema, + self.resolved_dedup.clone(), )) } + /// Check if a fingerprint matches an existing entity type definition. + fn fingerprint_matches_entity( + fingerprint: &EntityTypeFingerprint, + entity: &EntityType, + ) -> bool { + match fingerprint { + EntityTypeFingerprint::Enum { + base_name: _, + variants, + } => match &entity.kind { + EntityTypeKind::Enum { choices } => { + let existing: Vec<&SmolStr> = choices.iter().collect(); + let candidate: Vec<&SmolStr> = variants.iter().collect(); + existing == candidate + } + _ => false, + }, + } + } + + /// Look up whether a given fingerprint was deduplicated. + /// Returns the LCA namespace if so. + fn get_dedup_namespace(&self, fingerprint: &EntityTypeFingerprint) -> Option<&Option> { + self.resolved_dedup + .as_ref() + .and_then(|map| map.get(fingerprint)) + .map(|d| &d.lca_namespace) + } + + /// Recursively scan parameters for enum properties and record their fingerprints. + /// Recurses into nested objects to find enums at any depth. + #[expect( + clippy::ref_option, + reason = "Consistent with the rest of the codebase's namespace parameter style." + )] + fn collect_enum_fingerprints( + parameters: &Parameters, + namespace: &Option, + dedup_map: &mut DeduplicationMap, + ) { + for property in parameters.properties() { + Self::collect_enum_fingerprints_from_property_type( + property.name(), + property.property_type(), + namespace, + dedup_map, + ); + } + // Also scan type definitions within parameters + for type_def in parameters.type_definitions() { + Self::collect_enum_fingerprints_from_property_type( + type_def.name(), + type_def.property_type(), + namespace, + dedup_map, + ); + } + } + + /// Recursively scan a single property type for enum occurrences. + /// For objects, computes the child namespace and recurses into nested properties. + #[expect( + clippy::ref_option, + reason = "Consistent with the rest of the codebase's namespace parameter style." + )] + fn collect_enum_fingerprints_from_property_type( + name: &str, + property_type: &PropertyType, + namespace: &Option, + dedup_map: &mut DeduplicationMap, + ) { + match property_type { + PropertyType::Enum { variants } => { + if !variants.is_empty() { + if let Ok(base_name) = name.parse::() { + let fingerprint = EntityTypeFingerprint::Enum { + base_name, + variants: variants.clone(), + }; + dedup_map.record(fingerprint, namespace.clone()); + } + } + } + PropertyType::Object { + properties, + additional_properties, + } => { + if let Ok(obj_name) = name.parse::() { + let child_ns = Some(obj_name.qualify_with_name(namespace.as_ref())); + for prop in properties { + Self::collect_enum_fingerprints_from_property_type( + prop.name(), + prop.property_type(), + &child_ns, + dedup_map, + ); + } + if let Some(additional) = additional_properties { + let tag_name = format!("{name}Tag"); + Self::collect_enum_fingerprints_from_property_type( + &tag_name, + additional.as_ref(), + &child_ns, + dedup_map, + ); + } + } + } + PropertyType::Array { element_ty } => { + Self::collect_enum_fingerprints_from_property_type( + name, + element_ty.as_ref(), + namespace, + dedup_map, + ); + } + PropertyType::Union { types } => { + if let Ok(union_name) = name.parse::() { + let child_ns = Some(union_name.qualify_with_name(namespace.as_ref())); + for (i, ty) in types.iter().enumerate() { + let variant_name = format!("TypeChoice{i}"); + Self::collect_enum_fingerprints_from_property_type( + &variant_name, + ty, + &child_ns, + dedup_map, + ); + } + } + } + PropertyType::Tuple { types } => { + if let Ok(tuple_name) = name.parse::() { + let child_ns = Some(tuple_name.qualify_with_name(namespace.as_ref())); + for (i, ty) in types.iter().enumerate() { + let proj_name = format!("Proj{i}"); + Self::collect_enum_fingerprints_from_property_type( + &proj_name, ty, &child_ns, dedup_map, + ); + } + } + } + _ => {} + } + } + /// Add a new action to the generated Cedar Schema /// that corresponds to the input `ToolDescription` pub fn add_action_from_tool_description( @@ -389,12 +682,139 @@ impl SchemaGenerator { self.add_commontype(&namespace, ty, ty_name, true)?; } + self.deduplicate_entities(description)?; + for tool_description in description.tool_descriptions() { self.add_action_from_tool_description_inner(tool_description, common_types.clone())? } Ok(()) } + /// Scans all tool descriptions for equivalent enum entity types and places + /// shared definitions in the lowest common ancestor namespace. + /// Must be called before individual tool actions are processed. + fn deduplicate_entities( + &mut self, + description: &ServerDescription, + ) -> Result<(), SchemaGeneratorError> { + if !self.config.deduplicate_entity_types { + return Ok(()); + } + + let mut dedup_map = DeduplicationMap::default(); + + for tool_description in description.tool_descriptions() { + let tool_ns: Name = tool_description.name().parse()?; + let tool_ns = tool_ns.qualify_with_name(self.namespace.as_ref()); + let input_ns = Some(identifiers::INPUT_NAME.qualify_with_name(Some(&tool_ns))); + + Self::collect_enum_fingerprints(tool_description.inputs(), &input_ns, &mut dedup_map); + + if self.config.include_outputs { + let output_ns = Some(identifiers::OUTPUT_NAME.qualify_with_name(Some(&tool_ns))); + Self::collect_enum_fingerprints( + tool_description.outputs(), + &output_ns, + &mut dedup_map, + ); + } + + for type_def in tool_description.type_definitions() { + if let PropertyType::Enum { variants } = type_def.property_type() { + if !variants.is_empty() { + if let Ok(base_name) = type_def.name().parse::() { + let fingerprint = EntityTypeFingerprint::Enum { + base_name, + variants: variants.clone(), + }; + dedup_map.record(fingerprint, Some(tool_ns.clone())); + } + } + } + } + } + + let resolved = dedup_map.resolve_duplicates(); + + // Determine which fingerprints to skip: + // - Same base_name targeting the same LCA (different variants conflict) + // - Base_name already exists in the LCA namespace + let mut skipped = HashSet::<&EntityTypeFingerprint>::new(); + + // Group by (base_name, lca_namespace) to detect same-name conflicts + let mut lca_groups: HashMap<(&UnreservedId, &Option), Vec<&EntityTypeFingerprint>> = + HashMap::new(); + for (fp, info) in &resolved { + lca_groups + .entry((fp.base_name(), &info.lca_namespace)) + .or_default() + .push(fp); + } + for fps in lca_groups.values() { + if fps.len() > 1 { + skipped.extend(fps.iter()); + } + } + + // Skip fingerprints whose base_name collides with a *different* type in the LCA. + // If the LCA already has an identical enum (same name + same variants), we reuse it. + let mut reused = HashSet::<&EntityTypeFingerprint>::new(); + for (fp, info) in &resolved { + if skipped.contains(fp) { + continue; + } + let base_name = fp.base_name(); + if let Some(nsdef) = self.fragment.0.get(&info.lca_namespace) { + if nsdef.common_types.keys().any(|k| k.as_ref() == base_name) { + skipped.insert(fp); + } else if let Some(existing_entity) = nsdef.entity_types.get(base_name) { + if Self::fingerprint_matches_entity(fp, existing_entity) { + reused.insert(fp); + } else { + skipped.insert(fp); + } + } + } + } + + // Place non-skipped entity types in their LCA namespace. + // Reused types already exist — record them in `placed` without re-inserting. + let mut placed = HashMap::new(); + for (fingerprint, dedup_info) in &resolved { + if skipped.contains(fingerprint) { + continue; + } + let lca_ns = &dedup_info.lca_namespace; + + if !reused.contains(fingerprint) { + self.add_namespace(lca_ns.clone()); + + match fingerprint { + EntityTypeFingerprint::Enum { + base_name, + variants, + } => { + #[expect( + clippy::unwrap_used, + reason = "Variants are non-empty by construction from PropertyType::Enum" + )] + let choices = NonEmpty::from_slice(variants).unwrap(); + let ty = EntityType { + kind: EntityTypeKind::Enum { choices }, + annotations: Annotations::new(), + loc: None, + }; + self.add_entitytype(lca_ns, ty, base_name.clone(), true)?; + } + } + } + placed.insert(fingerprint.clone(), dedup_info.clone()); + } + + self.resolved_dedup = Some(placed); + Ok(()) + } + fn add_action_from_tool_description_inner( &mut self, description: &ToolDescription, @@ -901,16 +1321,32 @@ impl SchemaGenerator { PropertyType::Enum { variants } => { let choices = NonEmpty::from_slice(variants) .ok_or_else(|| SchemaGeneratorError::empty_enum_choice(ty_name.to_string()))?; - let ty = EntityType { - kind: EntityTypeKind::Enum { choices }, - annotations: Annotations::new(), - loc: None, + + // Check if this enum was deduplicated (placed in LCA namespace during Pass 1) + let fingerprint = EntityTypeFingerprint::Enum { + base_name: ty_name.clone(), + variants: variants.clone(), }; - self.add_entitytype(namespace, ty, ty_name.clone(), true)?; - let name = RawName::new_from_unreserved(ty_name, None); - let name = RawName::from_name(name.qualify_with_name(namespace.as_ref())); - TypeVariant::Entity { - name: self.flatten_rawname(name), + if let Some(lca_ns) = self.get_dedup_namespace(&fingerprint) { + // Reference the shared type in the LCA namespace (already placed in Pass 1) + let name = RawName::new_from_unreserved(ty_name, None); + let name = RawName::from_name(name.qualify_with_name(lca_ns.as_ref())); + TypeVariant::Entity { + name: self.flatten_rawname(name), + } + } else { + // Original behavior: place locally + let ty = EntityType { + kind: EntityTypeKind::Enum { choices }, + annotations: Annotations::new(), + loc: None, + }; + self.add_entitytype(namespace, ty, ty_name.clone(), true)?; + let name = RawName::new_from_unreserved(ty_name, None); + let name = RawName::from_name(name.qualify_with_name(namespace.as_ref())); + TypeVariant::Entity { + name: self.flatten_rawname(name), + } } } PropertyType::Array { element_ty } => { diff --git a/rust/cedar-policy-mcp-schema-generator/tests/integration.rs b/rust/cedar-policy-mcp-schema-generator/tests/integration.rs index 643093d..0551ba7 100644 --- a/rust/cedar-policy-mcp-schema-generator/tests/integration.rs +++ b/rust/cedar-policy-mcp-schema-generator/tests/integration.rs @@ -60,6 +60,65 @@ mod lib { ); } + fn run_inline_test(tools_json: &str, expected_schema: &str, config: SchemaGeneratorConfig) { + let description = + ServerDescription::from_json_str(tools_json).expect("Failed to parse tools JSON"); + let stub_file = + std::fs::File::open("examples/stub.cedarschema").expect("Failed to read schema file"); + let input_schema = Fragment::from_cedarschema_file(stub_file, Extensions::all_available()) + .expect("Failed to parse input schema") + .0; + + let mut generator = SchemaGenerator::new_with_config(input_schema, config) + .expect("input schema file is malformed"); + generator + .add_actions_from_server_description(&description) + .expect("Failed to add tool actions to schema generator"); + + let actual_schema = generator + .get_schema() + .clone() + .to_cedarschema() + .expect("Failed to resolve generated schema"); + assert!( + expected_schema == actual_schema, + "{} != {}", + expected_schema, + actual_schema + ); + } + + fn run_inline_test_with_stub( + tools_json: &str, + stub_schema: &str, + expected_schema: &str, + config: SchemaGeneratorConfig, + ) { + let description = + ServerDescription::from_json_str(tools_json).expect("Failed to parse tools JSON"); + let extensions = Extensions::all_available(); + let (input_schema, _) = Fragment::from_cedarschema_str(stub_schema, extensions) + .expect("Failed to parse custom stub schema"); + + let mut generator = SchemaGenerator::new_with_config(input_schema, config) + .expect("input schema file is malformed"); + generator + .add_actions_from_server_description(&description) + .expect("Failed to add tool actions to schema generator"); + + let actual_schema = generator + .get_schema() + .clone() + .to_cedarschema() + .expect("Failed to resolve generated schema"); + assert!( + expected_schema == actual_schema, + "{} != {}", + expected_schema, + actual_schema + ); + } + #[test] fn strands_agent() { run_integration_test( @@ -98,6 +157,1019 @@ mod lib { SchemaGeneratorConfig::default(), ); } + + #[test] + fn dedup_entity_types() { + run_integration_test( + "examples/dedup/dedup_tools.json", + "examples/dedup/dedup_tools.cedarschema", + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_entity_types_flat() { + run_integration_test( + "examples/dedup/dedup_tools.json", + "examples/dedup/dedup_tools_flat.cedarschema", + SchemaGeneratorConfig::default() + .deduplicate_entity_types(true) + .flatten_namespaces(true), + ); + } + + #[test] + fn dedup_same_name_different_variants() { + // Two tools have an enum with the same name ("mode") but different variants. + // They should NOT be deduplicated — each stays in its own Input namespace. + run_integration_test( + "examples/dedup/dedup_same_name_different_variants.json", + "examples/dedup/dedup_same_name_different_variants.cedarschema", + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_same_name_different_variants_flat() { + run_integration_test( + "examples/dedup/dedup_same_name_different_variants.json", + "examples/dedup/dedup_same_name_different_variants_flat.cedarschema", + SchemaGeneratorConfig::default() + .deduplicate_entity_types(true) + .flatten_namespaces(true), + ); + } + + #[test] + fn dedup_three_way_lca() { + // Single tool with the same enum ("priority") at three different nested object depths: + // MyMcpServer::tool_x::Input::B::C::priority + // MyMcpServer::tool_x::Input::B::D::priority + // MyMcpServer::tool_x::Input::E::F::priority + // The enum should be deduplicated to the LCA namespace (MyMcpServer::tool_x::Input). + run_integration_test( + "examples/dedup/dedup_three_way_lca.json", + "examples/dedup/dedup_three_way_lca.cedarschema", + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_three_way_lca_flat() { + run_integration_test( + "examples/dedup/dedup_three_way_lca.json", + "examples/dedup/dedup_three_way_lca_flat.cedarschema", + SchemaGeneratorConfig::default() + .deduplicate_entity_types(true) + .flatten_namespaces(true), + ); + } + + #[test] + fn dedup_collision_existing_entity_in_lca() { + // Two tools share an enum named "McpServer" (same name as existing entity type + // in the LCA namespace). Dedup should skip this enum — each tool keeps its own copy. + run_integration_test( + "examples/dedup/dedup_collision_existing_entity.json", + "examples/dedup/dedup_collision_existing_entity.cedarschema", + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_collision_same_enum_already_in_lca() { + // The LCA namespace already has an enum entity type with the same name AND same variants. + // Dedup should reuse the existing entity — tools reference it instead of creating local copies. + let stub_schema = r#" +namespace MyMcpServer { + @mcp_principal("User") + entity User { + id: String, + username: String, + }; + + @mcp_context("session") + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr, + }; + + @mcp_resource("McpServer") + entity McpServer; + + @mcp_action("call_tool") + action call_tool; + + // Pre-existing enum entity type with same variants as tools + entity status enum ["active", "inactive"]; +} +"#; + + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A with status enum", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["active", "inactive"], + "description": "Status" + }, + "query": { + "type": "string", + "description": "Query" + } + }, + "required": ["query"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B with same status enum", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["active", "inactive"], + "description": "Status" + }, + "data": { + "type": "string", + "description": "Data" + } + }, + "required": ["data"] + } + } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + query: String, + status?: MyMcpServer::status + }; + + type tool_bInput = { + data: String, + status?: MyMcpServer::status + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity status enum [\"active\", \"inactive\"]; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} +"; + + run_inline_test_with_stub( + tools_json, + stub_schema, + expected_schema, + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_collision_different_enum_already_in_lca() { + // The LCA namespace already has an enum entity type with the same name but DIFFERENT variants. + // Dedup should skip — each tool keeps its own local copy. + let stub_schema = r#" +namespace MyMcpServer { + @mcp_principal("User") + entity User { + id: String, + username: String, + }; + + @mcp_context("session") + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr, + }; + + @mcp_resource("McpServer") + entity McpServer; + + @mcp_action("call_tool") + action call_tool; + + // Pre-existing enum with DIFFERENT variants than tools + entity status enum ["open", "closed"]; +} +"#; + + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A with status enum", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["active", "inactive"], + "description": "Status" + }, + "query": { + "type": "string", + "description": "Query" + } + }, + "required": ["query"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B with same status enum", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["active", "inactive"], + "description": "Status" + }, + "data": { + "type": "string", + "description": "Data" + } + }, + "required": ["data"] + } + } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer::tool_a::Input { + entity status enum [\"active\", \"inactive\"]; +} + +namespace MyMcpServer::tool_b::Input { + entity status enum [\"active\", \"inactive\"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + query: String, + status?: MyMcpServer::tool_a::Input::status + }; + + type tool_bInput = { + data: String, + status?: MyMcpServer::tool_b::Input::status + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity status enum [\"open\", \"closed\"]; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} +"; + + run_inline_test_with_stub( + tools_json, + stub_schema, + expected_schema, + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_skip_both_when_same_name_different_variants_both_duplicated() { + // Two pairs of tools define `mode` with different variants: + // tool_a, tool_b: mode ["fast", "slow"] + // tool_c, tool_d: mode ["sync", "async"] + // Both fingerprints have >1 occurrence, both compute LCA = MyMcpServer. + // Since they'd collide at the same LCA, BOTH should be skipped. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A", + "inputSchema": { "json": { "type": "object", "properties": { "mode": { "type": "string", "enum": ["fast", "slow"] } }, "required": ["mode"] } } + }, + { + "name": "tool_b", + "description": "Tool B", + "inputSchema": { "json": { "type": "object", "properties": { "mode": { "type": "string", "enum": ["fast", "slow"] } }, "required": ["mode"] } } + }, + { + "name": "tool_c", + "description": "Tool C", + "inputSchema": { "json": { "type": "object", "properties": { "mode": { "type": "string", "enum": ["sync", "async"] } }, "required": ["mode"] } } + }, + { + "name": "tool_d", + "description": "Tool D", + "inputSchema": { "json": { "type": "object", "properties": { "mode": { "type": "string", "enum": ["sync", "async"] } }, "required": ["mode"] } } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer::tool_a::Input { + entity mode enum [\"fast\", \"slow\"]; +} + +namespace MyMcpServer::tool_b::Input { + entity mode enum [\"fast\", \"slow\"]; +} + +namespace MyMcpServer::tool_c::Input { + entity mode enum [\"sync\", \"async\"]; +} + +namespace MyMcpServer::tool_d::Input { + entity mode enum [\"sync\", \"async\"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + mode: MyMcpServer::tool_a::Input::mode + }; + + type tool_bInput = { + mode: MyMcpServer::tool_b::Input::mode + }; + + type tool_cInput = { + mode: MyMcpServer::tool_c::Input::mode + }; + + type tool_dInput = { + mode: MyMcpServer::tool_d::Input::mode + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; + + action \"tool_c\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_cInput, + session: CommonContext + } + }; + + action \"tool_d\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_dInput, + session: CommonContext + } + }; +} +"; + + run_inline_test( + tools_json, + expected_schema, + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_same_variants_different_order_not_deduplicated() { + // Two tools have an enum with the same variants but in different order. + // Order is significant, so they should NOT be deduplicated. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A", + "inputSchema": { "json": { "type": "object", "properties": { "mode": { "type": "string", "enum": ["fast", "slow"] } }, "required": ["mode"] } } + }, + { + "name": "tool_b", + "description": "Tool B", + "inputSchema": { "json": { "type": "object", "properties": { "mode": { "type": "string", "enum": ["slow", "fast"] } }, "required": ["mode"] } } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer::tool_a::Input { + entity mode enum [\"fast\", \"slow\"]; +} + +namespace MyMcpServer::tool_b::Input { + entity mode enum [\"slow\", \"fast\"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + mode: MyMcpServer::tool_a::Input::mode + }; + + type tool_bInput = { + mode: MyMcpServer::tool_b::Input::mode + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} +"; + + run_inline_test( + tools_json, + expected_schema, + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_enum_inside_array() { + // Two tools have the same enum nested inside an array property. + // The enum should still be deduplicated. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "priorities": { + "type": "array", + "items": { + "type": "string", + "enum": ["high", "medium", "low"] + } + } + }, + "required": ["priorities"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "priorities": { + "type": "array", + "items": { + "type": "string", + "enum": ["high", "medium", "low"] + } + } + }, + "required": ["priorities"] + } + } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + priorities: Set + }; + + type tool_bInput = { + priorities: Set + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity priorities enum [\"high\", \"medium\", \"low\"]; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} +"; + + run_inline_test( + tools_json, + expected_schema, + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_enum_across_outputs() { + // Two tools share an enum in their outputSchema. + // With include_outputs + deduplicate_entity_types, the enum should be + // deduplicated to the LCA namespace. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "query": { "type": "string" } + }, + "required": ["query"] + } + }, + "outputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["success", "failure", "pending"] + } + }, + "required": ["status"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "id": { "type": "string" } + }, + "required": ["id"] + } + }, + "outputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["success", "failure", "pending"] + } + }, + "required": ["status"] + } + } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + query: String + }; + + type tool_aOutput = { + status: MyMcpServer::status + }; + + type tool_bInput = { + id: String + }; + + type tool_bOutput = { + status: MyMcpServer::status + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity status enum [\"success\", \"failure\", \"pending\"]; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + output?: tool_aOutput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + output?: tool_bOutput, + session: CommonContext + } + }; +} +"; + + run_inline_test( + tools_json, + expected_schema, + SchemaGeneratorConfig::default() + .include_outputs(true) + .deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_enum_across_input_and_output() { + // One tool has an enum in inputSchema, the other has the same enum in outputSchema. + // They should still be deduplicated to the LCA. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A with enum in input", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "priority": { + "type": "string", + "enum": ["high", "medium", "low"] + } + }, + "required": ["priority"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B with same enum in output", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "id": { "type": "string" } + }, + "required": ["id"] + } + }, + "outputSchema": { + "json": { + "type": "object", + "properties": { + "priority": { + "type": "string", + "enum": ["high", "medium", "low"] + } + }, + "required": ["priority"] + } + } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + priority: MyMcpServer::priority + }; + + type tool_aOutput = { }; + + type tool_bInput = { + id: String + }; + + type tool_bOutput = { + priority: MyMcpServer::priority + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + entity priority enum [\"high\", \"medium\", \"low\"]; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + output?: tool_aOutput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + output?: tool_bOutput, + session: CommonContext + } + }; +} +"; + + run_inline_test( + tools_json, + expected_schema, + SchemaGeneratorConfig::default() + .include_outputs(true) + .deduplicate_entity_types(true), + ); + } + + #[test] + fn dedup_output_not_scanned_without_include_outputs() { + // When include_outputs is false, output enums should NOT participate in dedup. + // tool_a has "status" in input, tool_b has same "status" in output only. + // Without include_outputs, the output enum is never generated, so no dedup happens. + let tools_json = r#"{ + "result": { + "tools": [ + { + "name": "tool_a", + "description": "Tool A with enum in input", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["success", "failure"] + } + }, + "required": ["status"] + } + } + }, + { + "name": "tool_b", + "description": "Tool B with same enum in output only", + "inputSchema": { + "json": { + "type": "object", + "properties": { + "id": { "type": "string" } + }, + "required": ["id"] + } + }, + "outputSchema": { + "json": { + "type": "object", + "properties": { + "status": { + "type": "string", + "enum": ["success", "failure"] + } + }, + "required": ["status"] + } + } + } + ] + } + }"#; + + let expected_schema = "\ +namespace MyMcpServer::tool_a::Input { + entity status enum [\"success\", \"failure\"]; +} + +namespace MyMcpServer { + type CommonContext = { + currentTimestamp: datetime, + ipaddr: ipaddr + }; + + type tool_aInput = { + status: MyMcpServer::tool_a::Input::status + }; + + type tool_bInput = { + id: String + }; + + entity McpServer; + + entity User = { + id: String, + username: String + }; + + action \"call_tool\"; + + action \"tool_a\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_aInput, + session: CommonContext + } + }; + + action \"tool_b\" in [Action::\"call_tool\"] appliesTo { + principal: [User], + resource: [McpServer], + context: { + input: tool_bInput, + session: CommonContext + } + }; +} +"; + + run_inline_test( + tools_json, + expected_schema, + SchemaGeneratorConfig::default().deduplicate_entity_types(true), + ); + } } #[cfg(feature = "cli")]