Skip to content

Commit f432d8d

Browse files
authored
chat: Fix streaming parser for granite models (#15682)
* fix(chat): fix streaming parser for granite models * tests: add test cases for Granite models chat parser
1 parent 4067f07 commit f432d8d

File tree

2 files changed

+114
-13
lines changed

2 files changed

+114
-13
lines changed

common/chat.cpp

Lines changed: 23 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -2232,15 +2232,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
22322232

22332233
static void common_chat_parse_granite(common_chat_msg_parser & builder) {
22342234
// Parse thinking tags
2235+
static const common_regex start_think_regex(regex_escape("<think>"));
2236+
static const common_regex end_think_regex(regex_escape("</think>"));
2237+
// Granite models output partial tokens such as "<" and "<think".
2238+
// By leveraging try_consume_regex()/try_find_regex() throwing
2239+
// common_chat_msg_partial_exception for these partial tokens,
2240+
// processing is interrupted and the tokens are not passed to add_content().
2241+
if (auto res = builder.try_consume_regex(start_think_regex)) {
2242+
// Restore position for try_parse_reasoning()
2243+
builder.move_to(res->groups[0].begin);
2244+
builder.try_find_regex(end_think_regex, std::string::npos, false);
2245+
// Restore position for try_parse_reasoning()
2246+
builder.move_to(res->groups[0].begin);
2247+
}
22352248
builder.try_parse_reasoning("<think>", "</think>");
22362249

2237-
// Parse response tags using regex
2238-
static const common_regex response_regex("<response>([\\s\\S]*?)</response>");
2239-
if (auto res = builder.try_find_regex(response_regex)) {
2240-
// Extract the content between the tags (capture group 1)
2241-
auto content = builder.str(res->groups[1]);
2242-
builder.add_content(content);
2243-
builder.move_to(res->groups[0].end);
2250+
// Parse response tags
2251+
static const common_regex start_response_regex(regex_escape("<response>"));
2252+
static const common_regex end_response_regex(regex_escape("</response>"));
2253+
// Granite models output partial tokens such as "<" and "<response".
2254+
// Same hack as reasoning parsing.
2255+
if (builder.try_consume_regex(start_response_regex)) {
2256+
builder.try_find_regex(end_response_regex);
22442257
}
22452258

22462259
if (!builder.syntax().parse_tool_calls) {
@@ -2254,13 +2267,10 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
22542267
builder.move_to(res->groups[0].end);
22552268

22562269
// Expect JSON array of tool calls
2257-
auto tool_calls_data = builder.consume_json();
2258-
if (tool_calls_data.json.is_array()) {
2259-
if (!builder.add_tool_calls(tool_calls_data.json)) {
2260-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
2270+
if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
2271+
if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
2272+
throw common_chat_msg_partial_exception("incomplete tool call");
22612273
}
2262-
} else {
2263-
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
22642274
}
22652275
} else {
22662276
builder.add_content(builder.consume_rest());

tests/test-chat.cpp

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1402,6 +1402,12 @@ static void test_template_output_parsers() {
14021402
"Hello, world!\nWhat's up?",
14031403
/* is_partial= */ false,
14041404
{COMMON_CHAT_FORMAT_GRANITE}));
1405+
assert_msg_equals(
1406+
message_assist,
1407+
common_chat_parse(
1408+
"Hello, world!\nWhat's up?",
1409+
/* is_partial= */ true,
1410+
{COMMON_CHAT_FORMAT_GRANITE}));
14051411

14061412
// Test parsing content with thinking
14071413
assert_msg_equals(message_assist_thoughts,
@@ -1412,13 +1418,98 @@ static void test_template_output_parsers() {
14121418
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
14131419
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
14141420
}));
1421+
assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
1422+
common_chat_parse(
1423+
"<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
1424+
/* is_partial= */ false,
1425+
{COMMON_CHAT_FORMAT_GRANITE}));
1426+
assert_msg_equals(message_assist_thoughts,
1427+
common_chat_parse(
1428+
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
1429+
/* is_partial= */ true,
1430+
{
1431+
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1432+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1433+
}));
1434+
assert_msg_equals(message_assist_thoughts,
1435+
common_chat_parse(
1436+
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
1437+
/* is_partial= */ false,
1438+
{
1439+
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1440+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1441+
}));
1442+
assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
1443+
common_chat_parse(
1444+
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
1445+
/* is_partial= */ false,
1446+
{COMMON_CHAT_FORMAT_GRANITE}));
1447+
assert_msg_equals(message_assist_empty,
1448+
common_chat_parse(
1449+
"<think",
1450+
/* is_partial= */ true,
1451+
{
1452+
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1453+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1454+
}));
1455+
assert_msg_equals(message_assist_empty,
1456+
common_chat_parse(
1457+
"<think",
1458+
/* is_partial= */ true,
1459+
{COMMON_CHAT_FORMAT_GRANITE}));
1460+
assert_msg_equals(message_assist_thoughts_no_content,
1461+
common_chat_parse(
1462+
"<think>I'm\nthinking",
1463+
/* is_partial= */ true,
1464+
{
1465+
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1466+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1467+
}));
1468+
assert_msg_equals(
1469+
message_assist_empty,
1470+
common_chat_parse(
1471+
"<think>I'm\nthinking</think><response",
1472+
/* is_partial= */ true,
1473+
{COMMON_CHAT_FORMAT_GRANITE}));
14151474

14161475
// Test parsing tool calls
14171476
assert_msg_equals(message_assist_call,
14181477
common_chat_parse(
14191478
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
14201479
/* is_partial= */ false,
14211480
{COMMON_CHAT_FORMAT_GRANITE}));
1481+
assert_msg_equals(
1482+
message_assist_call_empty_args,
1483+
common_chat_parse(
1484+
"<|tool_call|>[{\"name\": \"special_function\"",
1485+
/* is_partial= */ true,
1486+
{COMMON_CHAT_FORMAT_GRANITE}));
1487+
assert_msg_equals(
1488+
message_assist_call_cutoff_args,
1489+
common_chat_parse(
1490+
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
1491+
/* is_partial= */ true,
1492+
{COMMON_CHAT_FORMAT_GRANITE}));
1493+
assert_msg_equals(
1494+
message_assist_call_cutoff_args,
1495+
common_chat_parse(
1496+
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
1497+
/* is_partial= */ true,
1498+
{
1499+
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1500+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1501+
}));
1502+
1503+
// Test parsing tool calls with thinking
1504+
assert_msg_equals(
1505+
message_assist_call_thoughts,
1506+
common_chat_parse(
1507+
"<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
1508+
/* is_partial= */ true,
1509+
{
1510+
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
1511+
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
1512+
}));
14221513

14231514
// Test template generation for regular content
14241515
test_templates(tmpls.get(), end_tokens, message_assist, tools,

0 commit comments

Comments
 (0)