archestra-ai · joeyorlando · Mar 2, 2026 · Mar 3, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/docs/openapi.json b/docs/openapi.json
diff --git a/docs/pages/platform-access-control.md b/docs/pages/platform-access-control.md
diff --git a/docs/pages/platform-mcp-gateway.md b/docs/pages/platform-mcp-gateway.md
@@ -79,3 +79,16 @@ Archestra's MCP Gateways support three authentication methods:
 - **External Identity Provider (JWKS)** — For MCP clients that authenticate with an external IdP (Keycloak, Okta, Entra ID, Auth0, etc.). The gateway validates JWT bearer tokens directly against the IdP's JWKS endpoint, allowing external users to access MCP tools without an Archestra account. Configure in **Settings → Identity Providers**, then select in the MCP Gateway's **Identity Provider (JWKS Auth)** dropdown.
 
 See [MCP Authentication](/docs/mcp-authentication) for more details.
+
+## MCP Rate Limits
+
+MCP Rate Limits control how frequently tool calls can be made through the MCP Gateway. Limits use a sliding window counter and are scoped per Agent or MCP gateway.
+
+### Types
+
+- **Per Server** — limits total calls to any tool on a given MCP server within a time window.
+- **Per Tool** — limits calls to a specific tool on a given MCP server within a time window.
+
+Multiple limits can apply to the same call (e.g., both a server-level and tool-level limit). All applicable limits are checked — the first exceeded limit triggers the rejection.
+
+Limits are checked before each tool execution. When a limit is exceeded, the tool call returns an error result with a message including the limit value, window, and approximate retry time.
diff --git a/docs/pages/platform-observability.md b/docs/pages/platform-observability.md
@@ -37,6 +37,7 @@ The endpoint `http://localhost:9050/metrics` exposes Prometheus-formatted metric
 
 - `mcp_tool_calls_total` - Total MCP tool calls by agent_id, agent_name, agent_type, mcp_server_name, tool_name, and status (success/error)
 - `mcp_tool_call_duration_seconds` - MCP tool call execution duration by agent_id, agent_name, agent_type, mcp_server_name, tool_name, and status
+- `mcp_rate_limit_rejections_total` - MCP tool calls rejected by [rate limits](/docs/platform-mcp-gateway#mcp-rate-limits), by agent_id, agent_name, mcp_server_name, tool_name, limit_type, and entity_type
 - `mcp_server_deployment_status` - Current deployment state of self-hosted MCP servers by server_name and state (not_created/pending/running/failed/succeeded). Value is 1 for the active state. Use `count(mcp_server_deployment_status{state="running"} == 1)` to count running deployments.
 
 ### Archestra Application Metrics

diff --git a/platform/backend/src/archestra-mcp-server.ts b/platform/backend/src/archestra-mcp-server.ts
@@ -30,8 +30,6 @@ import type { InternalMcpCatalog } from "@/types";
 import {
   AutonomyPolicyOperator,
   type LimitEntityType,
-  type LimitType,
-  LimitTypeSchema,
   type ToolInvocation,
   type TrustedData,
 } from "@/types";
@@ -533,61 +531,29 @@ export async function executeArchestraTool(
 
     try {
       const entityType = args?.entity_type as LimitEntityType;
-
       const entityId = args?.entity_id as string;
-      const limitType = args?.limit_type as LimitType;
       const limitValue = args?.limit_value as number;
       const model = args?.model as string[] | undefined;
-      const mcpServerName = args?.mcp_server_name as string | undefined;
-      const toolName = args?.tool_name as string | undefined;
 
       // Validate required fields
-      if (!entityType || !entityId || !limitType || limitValue === undefined) {
-        return {
-          content: [
-            {
-              type: "text",
-              text: "Error: entity_type, entity_id, limit_type, and limit_value are required fields.",
-            },
-          ],
-          isError: true,
-        };
-      }
-
-      // Validate limit type specific requirements
-      if (
-        limitType === "token_cost" &&
-        (!model || !Array.isArray(model) || model.length === 0)
-      ) {
-        return {
-          content: [
-            {
-              type: "text",
-              text: "Error: model array with at least one model is required for token_cost limits.",
-            },
-          ],
-          isError: true,
-        };
-      }
-
-      if (limitType === "mcp_server_calls" && !mcpServerName) {
+      if (!entityType || !entityId || limitValue === undefined) {
         return {
           content: [
             {
               type: "text",
-              text: "Error: mcp_server_name is required for mcp_server_calls limits.",
+              text: "Error: entity_type, entity_id, and limit_value are required fields.",
             },
           ],
           isError: true,
         };
       }
 
-      if (limitType === "tool_calls" && (!mcpServerName || !toolName)) {
+      if (!model || !Array.isArray(model) || model.length === 0) {
         return {
           content: [
             {
               type: "text",
-              text: "Error: mcp_server_name and tool_name are required for tool_calls limits.",
+              text: "Error: model array with at least one model is required.",
             },
           ],
           isError: true,
@@ -598,26 +564,15 @@ export async function executeArchestraTool(
       const limit = await LimitModel.create({
         entityType,
         entityId,
-        limitType,
         limitValue,
         model,
-        mcpServerName,
-        toolName,
       });
 
       return {
         content: [
           {
             type: "text",
-            text: `Successfully created limit.\n\nLimit ID: ${
-              limit.id
-            }\nEntity Type: ${limit.entityType}\nEntity ID: ${
-              limit.entityId
-            }\nLimit Type: ${limit.limitType}\nLimit Value: ${
-              limit.limitValue
-            }${limit.model ? `\nModel: ${limit.model}` : ""}${
-              limit.mcpServerName ? `\nMCP Server: ${limit.mcpServerName}` : ""
-            }${limit.toolName ? `\nTool: ${limit.toolName}` : ""}`,
+            text: `Successfully created limit.\n\nLimit ID: ${limit.id}\nEntity Type: ${limit.entityType}\nEntity ID: ${limit.entityId}\nLimit Value: ${limit.limitValue}${limit.model ? `\nModel: ${limit.model}` : ""}`,
           },
         ],
         isError: false,
@@ -673,7 +628,6 @@ export async function executeArchestraTool(
           let result = `**Limit ID:** ${limit.id}`;
           result += `\n  Entity Type: ${limit.entityType}`;
           result += `\n  Entity ID: ${limit.entityId}`;
-          result += `\n  Limit Type: ${limit.limitType}`;
           result += `\n  Limit Value: ${limit.limitValue}`;
           if (limit.model) result += `\n  Model: ${limit.model}`;
           if (limit.mcpServerName)
@@ -767,7 +721,7 @@ export async function executeArchestraTool(
         content: [
           {
             type: "text",
-            text: `Successfully updated limit.\n\nLimit ID: ${limit.id}\nEntity Type: ${limit.entityType}\nEntity ID: ${limit.entityId}\nLimit Type: ${limit.limitType}\nLimit Value: ${limit.limitValue}`,
+            text: `Successfully updated limit.\n\nLimit ID: ${limit.id}\nEntity Type: ${limit.entityType}\nEntity ID: ${limit.entityId}\nLimit Value: ${limit.limitValue}`,
           },
         ],
         isError: false,
@@ -2047,66 +2001,48 @@ export function getArchestraMcpTools(): Tool[] {
     },
     {
       name: TOOL_CREATE_LIMIT_FULL_NAME,
-      title: "Create Limit",
-      description:
-        "Create a new cost or usage limit for an organization, team, agent, LLM proxy, or MCP gateway. Supports token_cost, mcp_server_calls, and tool_calls limit types.",
+      title: "Create LLM Token Limit",
+      description: "Create a new token cost limit for an organization or team.",
       inputSchema: {
         type: "object",
         properties: {
           entity_type: {
             type: "string",
-            enum: ["organization", "team", "agent", "llm_proxy", "mcp_gateway"],
+            enum: ["organization", "team"],
             description: "The type of entity to apply the limit to",
           },
           entity_id: {
             type: "string",
-            description:
-              "The ID of the entity (organization, team, agent, LLM proxy, or MCP gateway)",
-          },
-          limit_type: {
-            type: "string",
-            enum: LimitTypeSchema.options,
-            description: "The type of limit to apply",
+            description: "The ID of the organization or team",
           },
           limit_value: {
             type: "number",
-            description:
-              "The limit value (tokens or count depending on limit type)",
+            description: "The token cost limit value in dollars",
           },
           model: {
             type: "array",
             items: {
               type: "string",
             },
-            description:
-              "Array of model names (required for token_cost limits)",
-          },
-          mcp_server_name: {
-            type: "string",
-            description:
-              "MCP server name (required for mcp_server_calls and tool_calls limits)",
-          },
-          tool_name: {
-            type: "string",
-            description: "Tool name (required for tool_calls limits)",
+            description: "Array of model names the limit applies to",
           },
         },
-        required: ["entity_type", "entity_id", "limit_type", "limit_value"],
+        required: ["entity_type", "entity_id", "limit_value", "model"],
       },
       annotations: {},
       _meta: {},
     },
     {
       name: TOOL_GET_LIMITS_FULL_NAME,
-      title: "Get Limits",
+      title: "Get LLM Token Limits",
       description:
         "Retrieve all limits, optionally filtered by entity type and/or entity ID.",
       inputSchema: {
         type: "object",
         properties: {
           entity_type: {
             type: "string",
-            enum: ["organization", "team", "agent", "llm_proxy", "mcp_gateway"],
+            enum: ["organization", "team"],
             description: "Optional filter by entity type",
           },
           entity_id: {
@@ -2121,7 +2057,7 @@ export function getArchestraMcpTools(): Tool[] {
     },
     {
       name: TOOL_UPDATE_LIMIT_FULL_NAME,
-      title: "Update Limit",
+      title: "Update LLM Token Limit",
       description: "Update an existing limit's value.",
       inputSchema: {
         type: "object",
@@ -2142,7 +2078,7 @@ export function getArchestraMcpTools(): Tool[] {
     },
     {
       name: TOOL_DELETE_LIMIT_FULL_NAME,
-      title: "Delete Limit",
+      title: "Delete LLM Token Limit",
       description: "Delete an existing limit by ID.",
       inputSchema: {
         type: "object",

diff --git a/platform/backend/src/cache-manager.ts b/platform/backend/src/cache-manager.ts
@@ -36,6 +36,8 @@ export const CacheKey = {
   SlackUserEmail: "slack-user-email",
   /** Virtual API key brute-force rate limiting per IP */
   VirtualKeyRateLimit: "virtual-key-rate-limit",
+  /** MCP rate limiting per limit ID */
+  McpRateLimit: "mcp-rate-limit",
 } as const;
 
 export type CacheKeyPrefix = (typeof CacheKey)[keyof typeof CacheKey];

diff --git a/platform/backend/src/clients/mcp-client.ts b/platform/backend/src/clients/mcp-client.ts
@@ -34,6 +34,7 @@ import type {
 import { deriveAuthMethod } from "@/utils/auth-method";
 import { previewToolResultContent } from "@/utils/tool-result-preview";
 import { K8sAttachTransport } from "./k8s-attach-transport";
+import { checkMcpRateLimits } from "./mcp-rate-limit";
 
 /**
  * Thrown when a stored HTTP session ID is no longer valid (e.g. pod restarted).
@@ -243,6 +244,23 @@ class McpClient {
       return targetMcpServerIdResult.error;
     }
     const { targetMcpServerId, mcpServerName } = targetMcpServerIdResult;
+
+    // Check MCP rate limits before execution
+    const rateLimitError = await checkMcpRateLimits({
+      agentId,
+      mcpServerName,
+      toolName: toolCall.name,
+    });
+    if (rateLimitError) {
+      return await this.createErrorResult(
+        toolCall,
+        agentId,
+        rateLimitError,
+        mcpServerName,
+        authInfo,
+      );
+    }
+
     const secretsResult = await this.getSecretsForMcpServer({
       targetMcpServerId: targetMcpServerId,
       toolCall,