Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .changeset/ai-chat-mcp-timeout-sse-interop.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
'@giantswarm/backstage-plugin-ai-chat-backend': patch
'@giantswarm/backstage-plugin-muster-backend': patch
'@giantswarm/backstage-plugin-gs-node': patch
---

Fix AI chat hanging forever when an MCP server is slow or its responses are dropped by the transport.

- MCP servers are now connected in parallel and each connection/tool-load is bounded by a timeout (15s default, configurable per server via `aiChat.mcp[].timeoutMs`). A hanging server is reported as failed and the chat continues with the remaining servers' tools.
- Patch `@ai-sdk/mcp` to treat SSE events without an explicit `event:` field as `message` events, per the SSE specification. MCP servers behind agentgateway emit bare `data:` frames, which the unpatched client silently dropped — leaving the request promise pending forever and hanging the whole chat request.
62 changes: 62 additions & 0 deletions .yarn/patches/@ai-sdk-mcp-npm-1.0.46-b48c61b836.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
diff --git a/dist/index.js b/dist/index.js
index 973f16ed6bbe752f46dc417dd4be08c78526fcfb..6a6ef2412c0044ffd898110254897626857c6238 100644
--- a/dist/index.js
+++ b/dist/index.js
@@ -1222,7 +1222,7 @@ var SseMCPTransport = class {
}
this.connected = true;
resolve();
- } else if (event === "message") {
+ } else if ((event === void 0 || event === "message")) {
try {
const message = await parseJSONRPCMessage(data);
(_a4 = this.onmessage) == null ? void 0 : _a4.call(this, message);
@@ -1497,7 +1497,7 @@ var HttpMCPTransport = class {
const { done, value } = await reader.read();
if (done) return;
const { event, data } = value;
- if (event === "message") {
+ if ((event === void 0 || event === "message")) {
try {
const msg = await parseJSONRPCMessage(data);
(_a4 = this.onmessage) == null ? void 0 : _a4.call(this, msg);
@@ -1624,7 +1624,7 @@ var HttpMCPTransport = class {
if (id) {
this.lastInboundEventId = id;
}
- if (event === "message") {
+ if ((event === void 0 || event === "message")) {
try {
const msg = await parseJSONRPCMessage(data);
(_a4 = this.onmessage) == null ? void 0 : _a4.call(this, msg);
diff --git a/dist/index.mjs b/dist/index.mjs
index 2b80c31413f15377f1bfa76a520a914655d720d0..7298ad5e561e1847047afda222221f216144d57a 100644
--- a/dist/index.mjs
+++ b/dist/index.mjs
@@ -1192,7 +1192,7 @@ var SseMCPTransport = class {
}
this.connected = true;
resolve();
- } else if (event === "message") {
+ } else if ((event === void 0 || event === "message")) {
try {
const message = await parseJSONRPCMessage(data);
(_a4 = this.onmessage) == null ? void 0 : _a4.call(this, message);
@@ -1471,7 +1471,7 @@ var HttpMCPTransport = class {
const { done, value } = await reader.read();
if (done) return;
const { event, data } = value;
- if (event === "message") {
+ if ((event === void 0 || event === "message")) {
try {
const msg = await parseJSONRPCMessage(data);
(_a4 = this.onmessage) == null ? void 0 : _a4.call(this, msg);
@@ -1598,7 +1598,7 @@ var HttpMCPTransport = class {
if (id) {
this.lastInboundEventId = id;
}
- if (event === "message") {
+ if ((event === void 0 || event === "message")) {
try {
const msg = await parseJSONRPCMessage(data);
(_a4 = this.onmessage) == null ? void 0 : _a4.call(this, msg);
2 changes: 1 addition & 1 deletion plugins/ai-chat-backend/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"dependencies": {
"@ai-sdk/anthropic": "^3.0.76",
"@ai-sdk/azure": "^3.0.64",
"@ai-sdk/mcp": "^1.0.41",
"@ai-sdk/mcp": "patch:@ai-sdk/mcp@npm%3A1.0.46#~/.yarn/patches/@ai-sdk-mcp-npm-1.0.46-b48c61b836.patch",
"@ai-sdk/openai": "^3.0.63",
"@ai-sdk/openai-compatible": "^2.0.47",
"@backstage/backend-defaults": "backstage:^",
Expand Down
202 changes: 202 additions & 0 deletions plugins/ai-chat-backend/src/getMcpTools.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,202 @@
import { ConfigReader } from '@backstage/config';
import { LoggerService } from '@backstage/backend-plugin-api';
import { McpClientCache } from '@giantswarm/backstage-plugin-gs-node';
import { experimental_createMCPClient as createMCPClient } from '@ai-sdk/mcp';
import { getMcpTools } from './getMcpTools';

jest.mock('@ai-sdk/mcp', () => ({
experimental_createMCPClient: jest.fn(),
}));

const createMCPClientMock = createMCPClient as jest.Mock;

function mockLogger(): LoggerService {
const logger = {
debug: jest.fn(),
info: jest.fn(),
warn: jest.fn(),
error: jest.fn(),
child: jest.fn(),
};
logger.child.mockReturnValue(logger);
return logger as unknown as LoggerService;
}

function makeGoodClient() {
return {
listResources: jest
.fn()
.mockRejectedValue(new Error('does not support resources')),
tools: jest.fn().mockResolvedValue({
my_tool: {
description: 'A test tool',
execute: async () => 'ok',
},
}),
close: jest.fn().mockResolvedValue(undefined),
};
}

describe('getMcpTools', () => {
let clientCache: McpClientCache;

beforeEach(() => {
jest.clearAllMocks();
clientCache = new McpClientCache(mockLogger());
});

afterEach(async () => {
// Don't await dispose: it may wait on never-resolving client promises
// that the hanging-server tests intentionally leave behind.
void clientCache.dispose().catch(() => {});
});

it('loads tools from a responsive MCP server', async () => {
createMCPClientMock.mockImplementation(() =>
Promise.resolve(makeGoodClient()),
);

const config = new ConfigReader({
aiChat: {
mcp: [{ name: 'good', url: 'http://good.example.com/mcp' }],
},
});

const result = await getMcpTools(
config,
{},
undefined,
mockLogger(),
clientCache,
);

expect(result.connectedServers).toEqual(['good']);
expect(result.failedServers).toEqual([]);
expect(Object.keys(result.tools)).toEqual(['my_tool']);
});

it('does not hang the chat request when an MCP server never completes the connection handshake', async () => {
createMCPClientMock.mockImplementation(({ name }: { name: string }) => {
if (name === 'hanging') {
// Simulates a server whose initialize response is never delivered
// to the client (observed in production with muster behind
// agentgateway): the promise never settles.
return new Promise(() => {});
}
return Promise.resolve(makeGoodClient());
});

const config = new ConfigReader({
aiChat: {
mcp: [
{ name: 'good', url: 'http://good.example.com/mcp' },
{
name: 'hanging',
url: 'http://hanging.example.com/mcp',
timeoutMs: 250,
},
],
},
});

const result = await getMcpTools(
config,
{},
undefined,
mockLogger(),
clientCache,
);

expect(result.connectedServers).toEqual(['good']);
expect(result.failedServers).toHaveLength(1);
expect(result.failedServers[0].name).toBe('hanging');
expect(result.failedServers[0].error).toMatch(/timed out/i);
// Tools from the healthy server are still available.
expect(Object.keys(result.tools)).toEqual(['my_tool']);
}, 5000);

it('does not hang the chat request when tools/list never returns', async () => {
createMCPClientMock.mockImplementation(({ name }: { name: string }) => {
if (name === 'hanging-tools') {
return Promise.resolve({
listResources: jest
.fn()
.mockRejectedValue(new Error('does not support resources')),
// tools/list response never arrives
tools: jest.fn().mockReturnValue(new Promise(() => {})),
close: jest.fn().mockResolvedValue(undefined),
});
}
return Promise.resolve(makeGoodClient());
});

const config = new ConfigReader({
aiChat: {
mcp: [
{
name: 'hanging-tools',
url: 'http://hanging.example.com/mcp',
timeoutMs: 250,
},
{ name: 'good', url: 'http://good.example.com/mcp' },
],
},
});

const result = await getMcpTools(
config,
{},
undefined,
mockLogger(),
clientCache,
);

expect(result.connectedServers).toEqual(['good']);
expect(result.failedServers).toHaveLength(1);
expect(result.failedServers[0].name).toBe('hanging-tools');
expect(result.failedServers[0].error).toMatch(/timed out/i);
expect(Object.keys(result.tools)).toEqual(['my_tool']);
}, 5000);

it('evicts a timed-out server from the cache so the next request retries', async () => {
let callCount = 0;
createMCPClientMock.mockImplementation(() => {
callCount += 1;
if (callCount === 1) {
return new Promise(() => {});
}
return Promise.resolve(makeGoodClient());
});

const config = new ConfigReader({
aiChat: {
mcp: [
{
name: 'flaky',
url: 'http://flaky.example.com/mcp',
timeoutMs: 250,
},
],
},
});

const first = await getMcpTools(
config,
{},
undefined,
mockLogger(),
clientCache,
);
expect(first.failedServers).toHaveLength(1);

const second = await getMcpTools(
config,
{},
undefined,
mockLogger(),
clientCache,
);
expect(second.connectedServers).toEqual(['flaky']);
expect(second.failedServers).toEqual([]);
}, 5000);
});
Loading
Loading