diff --git a/common/templating.py b/common/templating.py index cc0cceb1..edb1741d 100644 --- a/common/templating.py +++ b/common/templating.py @@ -30,6 +30,9 @@ class TemplateMetadata: stop_strings: List[str] = field(default_factory=list) tool_start: Optional[str] = None + tool_end: Optional[str] = None + tool_call_format: str = "json" # "json" or "xml" + xml_processor_type: Optional[str] = None # "glm45", "custom", etc. class PromptTemplate: @@ -76,6 +79,20 @@ async def extract_metadata(self, template_vars: dict): if isinstance(template_module.tool_start, str): template_metadata.tool_start = template_module.tool_start + if hasattr(template_module, "tool_end"): + if isinstance(template_module.tool_end, str): + template_metadata.tool_end = template_module.tool_end + + if hasattr(template_module, "tool_call_format"): + if isinstance(template_module.tool_call_format, str): + template_metadata.tool_call_format = template_module.tool_call_format + + if hasattr(template_module, "xml_processor_type"): + if isinstance(template_module.xml_processor_type, str): + template_metadata.xml_processor_type = ( + template_module.xml_processor_type + ) + self.metadata = template_metadata return template_metadata diff --git a/docs/XML-Tool-Calling-Implementation.md b/docs/XML-Tool-Calling-Implementation.md new file mode 100644 index 00000000..89531ecb --- /dev/null +++ b/docs/XML-Tool-Calling-Implementation.md @@ -0,0 +1,311 @@ +# XML Tool Calling Implementation for TabbyAPI + +This document describes the XML-based tool calling support implemented for GLM-4.5 and Qwen3-coder models in TabbyAPI. + +## Overview + +Some models (GLM-4.5, Qwen3-coder) generate tool calls in XML format, which differs from the OpenAI JSON format that TabbyAPI expects. This implementation provides a generic XML tool call processor that converts various XML tool call formats to OpenAI-compatible JSON format. + +## Architecture + +### Components + +1. **BaseXMLToolCallProcessor** (`endpoints/OAI/utils/xml_tool_processors.py`) + - Abstract base class for XML tool call processors + - Provides common functionality for parsing and converting tool calls + - Extensible design allows support for other XML-based models + +2. **GLM45ToolCallProcessor** (`endpoints/OAI/utils/xml_tool_processors.py`) + - Concrete implementation for GLM-4.5 specific XML format + - Handles the `` and `/` structure + - Converts XML to OpenAI JSON format + +3. **Qwen3CoderToolCallProcessor** (`endpoints/OAI/utils/xml_tool_processors.py`) + - Concrete implementation for Qwen3-coder specific XML format + - Handles nested `value` structure + - Supports multi-line parameter values + - Converts XML to OpenAI JSON format + +4. **XMLToolCallProcessorFactory** (`endpoints/OAI/utils/xml_tool_processors.py`) + - Factory class for creating appropriate XML processors + - Supports GLM-4.5 ("glm45", "glm-4.5", "glm4") and Qwen3-coder ("qwen3-coder", "qwen3") processors + - Supports extensibility by allowing registration of new processor types + +5. **Enhanced TemplateMetadata** (`common/templating.py`) + - Extended to support XML tool call configuration + - New fields: `tool_call_format`, `xml_processor_type`, `tool_end` + +6. **Enhanced ToolCallProcessor** (`endpoints/OAI/utils/tools.py`) + - Added `from_text()` method that routes to appropriate processor + - Added `from_xml()` method for XML-specific processing + - Maintains backward compatibility with JSON processing + +### Supported XML Formats + +#### GLM-4.5 XML Format + +The GLM-4.5 model generates tool calls in this format: + +```xml +function_name +parameter1 +value1 +parameter2 +value2 + +``` + +#### Qwen3-coder XML Format + +The Qwen3-coder model generates tool calls in this nested format: + +```xml + + + +value1 + + +This is a multi-line +parameter value that spans +multiple lines + + + +``` + +Both formats get converted to OpenAI JSON format: + +```json +{ + "id": "call_12345", + "type": "function", + "function": { + "name": "function_name", + "arguments": "{\"parameter1\": \"value1\", \"parameter2\": \"value2\"}" + } +} +``` + +## Usage + +### Template Configuration + +#### GLM-4.5 Template + +The GLM-4.5 template (`templates/tool_calls/glm-4p5-chat-template-tabbyapi.jinja`) includes: + +```jinja +{# Metadata #} +{%- set stop_strings = ["<|user|>", "<|assistant|>", "<|observation|>", "<|system|>"] -%} +{%- set tool_start = "" -%} +{%- set tool_end = "" -%} +{%- set tool_call_format = "xml" -%} +{%- set xml_processor_type = "glm45" -%} +``` + +#### Qwen3-coder Template + +The Qwen3-coder template (`templates/tool_calls/qwen3-coder-tabbyapi.jinja`) includes: + +```jinja +{# XML Tool Call Processing Configuration #} +{%- set tool_call_format = "xml" -%} +{%- set xml_processor_type = "qwen3-coder" -%} +``` + +### Loading Models + +#### GLM-4.5 Models + +When loading a GLM-4.5 model, specify the tool-calling template: + +```yaml +# config.yml +model: + model_name: "path/to/glm-4.5-model" + prompt_template: "tool_calls/glm-4p5-chat-template-tabbyapi" +``` + +#### Qwen3-coder Models + +When loading a Qwen3-coder model, specify the tool-calling template: + +```yaml +# config.yml +model: + model_name: "path/to/qwen3-coder-model" + prompt_template: "tool_calls/qwen3-coder-tabbyapi" +``` + +Or via API: + +```bash +# GLM-4.5 +curl -X POST "http://localhost:5000/v1/model/load" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "path/to/glm-4.5-model", + "prompt_template": "tool_calls/glm-4p5-chat-template-tabbyapi" + }' + +# Qwen3-coder +curl -X POST "http://localhost:5000/v1/model/load" \ + -H "Content-Type: application/json" \ + -d '{ + "name": "path/to/qwen3-coder-model", + "prompt_template": "tool_calls/qwen3-coder-tabbyapi" + }' +``` + +### Tool Call Request + +Standard OpenAI-compatible tool calling request: + +```json +{ + "model": "glm-4.5", + "messages": [ + { + "role": "user", + "content": "What's the weather in Beijing?" + } + ], + "tools": [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get weather information", + "parameters": { + "type": "object", + "properties": { + "city": { + "type": "string", + "description": "City name" + }, + "date": { + "type": "string", + "description": "Date in YYYY-MM-DD format" + } + }, + "required": ["city"] + } + } + } + ] +} +``` + +## Integration Flow + +1. **Template Processing**: The template metadata indicates XML format tool calls +2. **Model Generation**: GLM-4.5 generates XML tool calls when `` trigger is detected +3. **XML Parsing**: `GLM45ToolCallProcessor` parses the XML structure +4. **JSON Conversion**: XML is converted to OpenAI-compatible JSON format +5. **Standard Pipeline**: Converted tool calls flow through normal TabbyAPI processing + +## Extensibility + +### Adding New XML Processors + +To support other XML-based models: + +1. Create a new processor class extending `BaseXMLToolCallProcessor` +2. Implement the required methods for the specific XML format +3. Register the processor with the factory: + +```python +# Custom processor +class CustomXMLProcessor(BaseXMLToolCallProcessor): + def has_tool_call(self, text: str) -> bool: + return "" in text + + def parse_xml_to_json(self, text: str, tools: List[Tool]) -> List[ToolCall]: + # Custom parsing logic + pass + +# Register processor +XMLToolCallProcessorFactory.register_processor("custom", CustomXMLProcessor) +``` + +### Template Configuration + +Create a template with appropriate metadata: + +```jinja +{%- set tool_call_format = "xml" -%} +{%- set xml_processor_type = "custom" -%} +{%- set tool_start = "" -%} +{%- set tool_end = "" -%} +``` + +## Testing + +Unit tests are provided in `tests/test_xml_tool_calls.py` covering: + +- XML parsing functionality +- Multiple tool call handling +- JSON conversion accuracy +- Error handling for malformed XML +- Factory pattern functionality +- Argument type processing + +Run tests with: + +```bash +python -m pytest tests/test_xml_tool_calls.py -v +``` + +## Error Handling + +The implementation includes robust error handling: + +- **Malformed XML**: Returns empty tool call list, logs error +- **Unknown Functions**: Still processes but without type validation +- **Parsing Failures**: Falls back gracefully, maintains system stability +- **Missing Dependencies**: Graceful degradation to JSON processing + +## Performance Considerations + +- **Regex-based Parsing**: Efficient for typical tool call volumes +- **Lazy Evaluation**: Processors created only when needed +- **Memory Efficient**: Processes tool calls incrementally +- **Caching**: Template metadata cached after first extraction + +## Compatibility + +- **Backward Compatible**: Existing JSON tool calling continues to work +- **OpenAI Standard**: Output format matches OpenAI API specification +- **Streaming Support**: Works with both streaming and non-streaming responses +- **Multi-tool**: Supports multiple tool calls in single response + +## Troubleshooting + +### Common Issues + +1. **Tool calls not detected** + - Verify template has `tool_call_format = "xml"` + - Check `tool_start` matches model output + - Ensure `xml_processor_type` is correct + +2. **Parsing errors** + - Validate XML format matches expected structure + - Check for missing closing tags + - Verify argument key/value pairing + +3. **JSON conversion failures** + - Check argument types in tool definitions + - Validate JSON-formatted argument values + - Review error logs for specific parsing issues + +### Debug Mode + +Enable detailed logging for troubleshooting: + +```python +import logging +logging.getLogger("endpoints.OAI.utils.xml_tool_processors").setLevel(logging.DEBUG) +``` + +This implementation provides a robust, extensible foundation for XML-based tool calling in TabbyAPI while maintaining full compatibility with existing JSON-based tool calling functionality. \ No newline at end of file diff --git a/endpoints/OAI/utils/chat_completion.py b/endpoints/OAI/utils/chat_completion.py index 4a6c2106..aec29876 100644 --- a/endpoints/OAI/utils/chat_completion.py +++ b/endpoints/OAI/utils/chat_completion.py @@ -27,13 +27,17 @@ ChatCompletionResponse, ChatCompletionStreamChoice, ) +from endpoints.OAI.types.tools import ToolSpec from endpoints.OAI.types.common import UsageStats from endpoints.OAI.utils.completion import _parse_gen_request_id, _stream_collector from endpoints.OAI.utils.tools import ToolCallProcessor, TOOL_CALL_SCHEMA def _create_response( - request_id: str, generations: List[dict], model_name: Optional[str] + request_id: str, + generations: List[dict], + model_name: Optional[str], + tools: Optional[List[ToolSpec]] = None, ): """Create a chat completion response from the provided text.""" @@ -144,9 +148,21 @@ def _create_stream_chunk( # Mark finish_reason as tool_calls since this is the last chunk if "tool_calls" in generation: tool_calls = generation["tool_calls"] - message = ChatCompletionMessage( - tool_calls=ToolCallProcessor.from_json(tool_calls) - ) + # Get template metadata for tool call processing + template_metadata = model.container.prompt_template.metadata + if template_metadata and template_metadata.tool_call_format == "xml": + # Use XML processor for XML-based tool calls + processed_tool_calls = ToolCallProcessor.from_text( + tool_calls, + [], # We don't have tools context in streaming + tool_call_format="xml", + xml_processor_type=template_metadata.xml_processor_type, + ) + else: + # Default to JSON processor + processed_tool_calls = ToolCallProcessor.from_json(tool_calls) + + message = ChatCompletionMessage(tool_calls=processed_tool_calls) choice.delta = message choice.finish_reason = "tool_calls" @@ -445,7 +461,9 @@ async def generate_chat_completion( prompt, embeddings, data, generations, request ) - response = _create_response(request.state.id, generations, model_path.name) + response = _create_response( + request.state.id, generations, model_path.name, data.tools + ) logger.info(f"Finished chat completion request {request.state.id}") diff --git a/endpoints/OAI/utils/tools.py b/endpoints/OAI/utils/tools.py index c1ebdedf..d3540b11 100644 --- a/endpoints/OAI/utils/tools.py +++ b/endpoints/OAI/utils/tools.py @@ -1,8 +1,9 @@ import json from loguru import logger -from typing import List +from typing import List, Optional -from endpoints.OAI.types.tools import ToolCall +from endpoints.OAI.types.tools import ToolCall, Tool +from endpoints.OAI.utils.xml_tool_processors import XMLToolCallProcessorFactory TOOL_CALL_SCHEMA = { @@ -41,6 +42,50 @@ def from_json(tool_calls_str: str) -> List[ToolCall]: return [ToolCall(**tool_call) for tool_call in tool_calls] + @staticmethod + def from_xml( + tool_calls_text: str, tools: List[Tool], xml_processor_type: str = "glm45" + ) -> List[ToolCall]: + """Process XML tool calls and convert to ToolCall objects""" + try: + processor = XMLToolCallProcessorFactory.create_processor(xml_processor_type) + return processor.parse_xml_to_json(tool_calls_text, tools) + except Exception as e: + logger.error(f"Error processing XML tool calls: {e}") + return [] + + @staticmethod + def from_text( + tool_calls_text: str, + tools: List[Tool], + tool_call_format: str = "json", + xml_processor_type: Optional[str] = None, + ) -> List[ToolCall]: + """ + Process tool calls from text, detecting format and routing appropriately. + + Args: + tool_calls_text: Raw text containing tool calls + tools: Available tools for validation + tool_call_format: Format type ("json" or "xml") + xml_processor_type: Type of XML processor to use if format is XML + + Returns: + List of parsed ToolCall objects + """ + if tool_call_format.lower() == "xml": + if not xml_processor_type: + logger.warning( + "XML format specified but no xml_processor_type provided, " + "using glm45" + ) + xml_processor_type = "glm45" + return ToolCallProcessor.from_xml( + tool_calls_text, tools, xml_processor_type + ) + else: + return ToolCallProcessor.from_json(tool_calls_text) + @staticmethod def dump(tool_calls: List[ToolCall]) -> List[dict]: """ diff --git a/endpoints/OAI/utils/xml_tool_processors.py b/endpoints/OAI/utils/xml_tool_processors.py new file mode 100644 index 00000000..854b4c2b --- /dev/null +++ b/endpoints/OAI/utils/xml_tool_processors.py @@ -0,0 +1,245 @@ +"""XML tool call processors for converting XML-based tool calls to OpenAI format.""" + +import ast +import json +import re +import logging +from abc import ABC, abstractmethod +from typing import List, Dict, Any, Optional, Tuple + +from endpoints.OAI.types.tools import ToolCall, Tool, ToolSpec + +logger = logging.getLogger(__name__) + + +class BaseXMLToolCallProcessor(ABC): + """Base class for XML-based tool call processors.""" + + def __init__(self): + self.tool_start_pattern: str = "" + self.tool_end_pattern: str = "" + + @abstractmethod + def has_tool_call(self, text: str) -> bool: + """Check if the text contains XML format tool calls.""" + pass + + @abstractmethod + def parse_xml_to_json(self, text: str, tools: List[ToolSpec]) -> List[ToolCall]: + """Parse XML tool calls from text and convert to OpenAI JSON format.""" + pass + + def _parse_arguments(self, json_value: str) -> Tuple[Any, bool]: + """Parse argument value, trying JSON first, then literal_eval.""" + try: + try: + parsed_value = json.loads(json_value) + except (json.JSONDecodeError, ValueError): + parsed_value = ast.literal_eval(json_value) + return parsed_value, True + except (json.JSONDecodeError, ValueError, SyntaxError): + return json_value, False + + def _get_argument_type( + self, func_name: str, arg_key: str, tools: List[ToolSpec] + ) -> Optional[str]: + """Get the expected type of an argument based on tool definition.""" + name_to_tool = {tool.function.name: tool for tool in tools} + if func_name not in name_to_tool: + return None + tool = name_to_tool[func_name] + if arg_key not in tool.function.parameters["properties"]: + return None + return tool.function.parameters["properties"][arg_key].get("type", None) + + def _create_tool_call( + self, name: str, arguments: Dict[str, Any], call_id: Optional[str] = None + ) -> ToolCall: + """Create a ToolCall object from parsed data.""" + return ToolCall( + id=call_id + or f"call_{hash(f'{name}_{json.dumps(arguments, sort_keys=True)}')}", + type="function", + function=Tool(name=name, arguments=json.dumps(arguments)), + ) + + +class GLM45ToolCallProcessor(BaseXMLToolCallProcessor): + """ + Tool call processor for GLM-4.5 models. + + Handles XML format like: + function_name + parameter1 + value1 + parameter2 + value2 + + """ + + def __init__(self): + super().__init__() + self.tool_start_pattern = "" + self.tool_end_pattern = "" + self.func_call_regex = r".*?" + self.func_detail_regex = r"([^\n]*)\n(.*)" + self.func_arg_regex = r"(.*?)\s*(.*?)" + + def has_tool_call(self, text: str) -> bool: + """Check if the text contains GLM-4.5 format tool calls.""" + return self.tool_start_pattern in text + + def parse_xml_to_json(self, text: str, tools: List[ToolSpec]) -> List[ToolCall]: + """Parse GLM-4.5 XML tool calls and convert to OpenAI JSON format.""" + if not self.has_tool_call(text): + return [] + + # Find all tool call matches + match_results = re.findall(self.func_call_regex, text, re.DOTALL) + tool_calls = [] + + try: + for match_result in match_results: + # Extract function name and arguments section + func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL) + if not func_detail: + logger.warning(f"Could not parse tool call: {match_result}") + continue + + func_name = func_detail.group(1).strip() + func_args_section = func_detail.group(2).strip() + + # Extract argument key-value pairs + arg_pairs = re.findall( + self.func_arg_regex, func_args_section, re.DOTALL + ) + arguments = {} + + for arg_key, arg_value in arg_pairs: + arg_key = arg_key.strip() + arg_value = arg_value.strip() + + # Get expected argument type from tool definition + arg_type = self._get_argument_type(func_name, arg_key, tools) + + # Parse non-string arguments + if arg_type != "string": + arg_value, _ = self._parse_arguments(arg_value) + + arguments[arg_key] = arg_value + + # Create ToolCall object + tool_call = self._create_tool_call(func_name, arguments) + tool_calls.append(tool_call) + + return tool_calls + + except Exception as e: + logger.error(f"Error parsing GLM-4.5 XML tool calls: {e}") + return [] + + +class Qwen3CoderToolCallProcessor(BaseXMLToolCallProcessor): + """ + Tool call processor for Qwen3-coder models. + + Handles XML format like: + + + + value_1 + + + This is the value for the second parameter + that can span + multiple lines + + + + """ + + def __init__(self): + super().__init__() + self.tool_start_pattern = "" + self.tool_end_pattern = "" + self.func_call_regex = r".*?" + self.func_detail_regex = r"]+)>(.*?)" + self.param_regex = r"]+)>(.*?)" + + def has_tool_call(self, text: str) -> bool: + """Check if the text contains Qwen3-coder format tool calls.""" + return self.tool_start_pattern in text + + def parse_xml_to_json(self, text: str, tools: List[ToolSpec]) -> List[ToolCall]: + """Parse Qwen3-coder XML tool calls and convert to OpenAI JSON format.""" + if not self.has_tool_call(text): + return [] + + # Find all tool call matches + match_results = re.findall(self.func_call_regex, text, re.DOTALL) + tool_calls = [] + + try: + for match_result in match_results: + # Extract function name and parameters section + func_detail = re.search(self.func_detail_regex, match_result, re.DOTALL) + if not func_detail: + logger.warning(f"Could not parse tool call: {match_result}") + continue + + func_name = func_detail.group(1).strip() + func_content = func_detail.group(2).strip() + + # Extract parameter name-value pairs + param_pairs = re.findall(self.param_regex, func_content, re.DOTALL) + arguments = {} + + for param_name, param_value in param_pairs: + param_name = param_name.strip() + param_value = param_value.strip() + + # Get expected argument type from tool definition + arg_type = self._get_argument_type(func_name, param_name, tools) + + # Parse non-string arguments + if arg_type != "string": + param_value, _ = self._parse_arguments(param_value) + + arguments[param_name] = param_value + + # Create ToolCall object + tool_call = self._create_tool_call(func_name, arguments) + tool_calls.append(tool_call) + + return tool_calls + + except Exception as e: + logger.error(f"Error parsing Qwen3-coder XML tool calls: {e}") + return [] + + +class XMLToolCallProcessorFactory: + """Factory for creating appropriate XML tool call processors.""" + + _processors = { + "glm45": GLM45ToolCallProcessor, + "qwen3-coder": Qwen3CoderToolCallProcessor, + } + + @classmethod + def create_processor(cls, processor_type: str) -> BaseXMLToolCallProcessor: + """Create an XML tool call processor of the specified type.""" + processor_class = cls._processors.get(processor_type.lower()) + if not processor_class: + raise ValueError(f"Unknown XML tool call processor type: {processor_type}") + return processor_class() + + @classmethod + def register_processor(cls, name: str, processor_class: type): + """Register a new XML tool call processor type.""" + cls._processors[name.lower()] = processor_class + + @classmethod + def get_available_processors(cls) -> List[str]: + """Get list of available processor types.""" + return list(cls._processors.keys()) diff --git a/tests/test_xml_tool_calls.py b/tests/test_xml_tool_calls.py new file mode 100644 index 00000000..da48165f --- /dev/null +++ b/tests/test_xml_tool_calls.py @@ -0,0 +1,553 @@ +"""Unit tests for XML tool call processing functionality.""" + +import pytest +import json +from endpoints.OAI.utils.xml_tool_processors import ( + GLM45ToolCallProcessor, + Qwen3CoderToolCallProcessor, + XMLToolCallProcessorFactory, +) +from endpoints.OAI.types.tools import ToolCall, ToolSpec, Function + + +class TestGLM45ToolCallProcessor: + """Test GLM-4.5 XML tool call processor.""" + + def setup_method(self): + """Set up test fixtures.""" + self.processor = GLM45ToolCallProcessor() + self.sample_tools = [ + ToolSpec( + type="function", + function=Function( + name="get_weather", + description="Get weather information for a city", + parameters={ + "type": "object", + "properties": { + "city": {"type": "string", "description": "City name"}, + "date": { + "type": "string", + "description": "Date in YYYY-MM-DD format", + }, + "units": { + "type": "string", + "description": "Temperature units", + }, + }, + }, + ), + ), + ToolSpec( + type="function", + function=Function( + name="calculate_sum", + description="Calculate the sum of numbers", + parameters={ + "type": "object", + "properties": { + "numbers": { + "type": "array", + "description": "List of numbers", + }, + "precision": { + "type": "integer", + "description": "Decimal precision", + }, + }, + }, + ), + ), + ] + + def test_has_tool_call_positive(self): + """Test detection of XML tool calls.""" + text_with_tool = """Some text before +get_weather +city +Beijing + +Some text after""" + + assert self.processor.has_tool_call(text_with_tool) is True + + def test_has_tool_call_negative(self): + """Test when no tool calls are present.""" + text_without_tool = "This is just regular text with no tool calls." + + assert self.processor.has_tool_call(text_without_tool) is False + + def test_parse_single_tool_call(self): + """Test parsing a single XML tool call.""" + xml_text = """get_weather +city +Beijing +date +2024-06-27 +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 1 + assert isinstance(result[0], ToolCall) + assert result[0].function.name == "get_weather" + + arguments = json.loads(result[0].function.arguments) + assert arguments["city"] == "Beijing" + assert arguments["date"] == "2024-06-27" + + def test_parse_multiple_tool_calls(self): + """Test parsing multiple XML tool calls.""" + xml_text = """get_weather +city +Beijing +date +2024-06-27 + + +calculate_sum +numbers +[1, 2, 3, 4, 5] +precision +2 +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 2 + + # First tool call + assert result[0].function.name == "get_weather" + args1 = json.loads(result[0].function.arguments) + assert args1["city"] == "Beijing" + assert args1["date"] == "2024-06-27" + + # Second tool call + assert result[1].function.name == "calculate_sum" + args2 = json.loads(result[1].function.arguments) + assert args2["numbers"] == [1, 2, 3, 4, 5] + assert args2["precision"] == 2 + + def test_parse_with_json_values(self): + """Test parsing XML tool calls with JSON-formatted argument values.""" + xml_text = """calculate_sum +numbers +[10, 20, 30] +precision +3 +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 1 + arguments = json.loads(result[0].function.arguments) + assert arguments["numbers"] == [10, 20, 30] + assert arguments["precision"] == 3 + + def test_parse_with_surrounding_text(self): + """Test parsing XML tool calls with surrounding text.""" + xml_text = """I need to check the weather and do some calculations. + +get_weather +city +Shanghai +units +metric + + +Let me also calculate something: + +calculate_sum +numbers +[5, 10, 15] + + +That should do it.""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 2 + assert result[0].function.name == "get_weather" + assert result[1].function.name == "calculate_sum" + + def test_parse_malformed_xml(self): + """Test handling of malformed XML.""" + malformed_xml = """get_weather +city +Beijing +""" # Missing closing tag for arg_value + + result = self.processor.parse_xml_to_json(malformed_xml, self.sample_tools) + + # Should create tool call but with empty arguments due to malformed arg_value + assert len(result) == 1 + assert result[0].function.name == "get_weather" + arguments = json.loads(result[0].function.arguments) + assert arguments == {} # Empty arguments due to malformed XML + + def test_empty_input(self): + """Test parsing empty input.""" + result = self.processor.parse_xml_to_json("", self.sample_tools) + assert len(result) == 0 + + def test_no_matching_tools(self): + """Test parsing with no matching tools in the tool list.""" + xml_text = """unknown_function +param +value +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + # Should still parse but with no type validation + assert len(result) == 1 + assert result[0].function.name == "unknown_function" + + +class TestXMLToolCallProcessorFactory: + """Test XML tool call processor factory.""" + + def test_create_glm45_processor(self): + """Test creating GLM-4.5 processor.""" + processor = XMLToolCallProcessorFactory.create_processor("glm45") + assert isinstance(processor, GLM45ToolCallProcessor) + + def test_create_glm45_processor_variations(self): + """Test creating GLM-4.5 processor with different name variations.""" + for name in ["glm45", "GLM45"]: + processor = XMLToolCallProcessorFactory.create_processor(name) + assert isinstance(processor, GLM45ToolCallProcessor) + + def test_create_unknown_processor(self): + """Test error handling for unknown processor type.""" + with pytest.raises(ValueError, match="Unknown XML tool call processor type"): + XMLToolCallProcessorFactory.create_processor("unknown_processor") + + def test_create_qwen3_coder_processor(self): + """Test creating Qwen3-coder processor.""" + processor = XMLToolCallProcessorFactory.create_processor("qwen3-coder") + assert isinstance(processor, Qwen3CoderToolCallProcessor) + + def test_create_qwen3_coder_processor_variations(self): + """Test creating Qwen3-coder processor with different name variations.""" + for name in ["qwen3-coder", "QWEN3-CODER"]: + processor = XMLToolCallProcessorFactory.create_processor(name) + assert isinstance(processor, Qwen3CoderToolCallProcessor) + + def test_get_available_processors(self): + """Test getting list of available processors.""" + processors = XMLToolCallProcessorFactory.get_available_processors() + assert "glm45" in processors + assert "qwen3-coder" in processors + + +class TestQwen3CoderToolCallProcessor: + """Test Qwen3-coder XML tool call processor.""" + + def setup_method(self): + """Set up test fixtures.""" + self.processor = Qwen3CoderToolCallProcessor() + self.sample_tools = [ + ToolSpec( + type="function", + function=Function( + name="get_weather", + description="Get weather information for a city", + parameters={ + "type": "object", + "properties": { + "city": {"type": "string", "description": "City name"}, + "date": { + "type": "string", + "description": "Date in YYYY-MM-DD format", + }, + "units": { + "type": "string", + "description": "Temperature units", + }, + }, + }, + ), + ), + ToolSpec( + type="function", + function=Function( + name="calculate_sum", + description="Calculate the sum of numbers", + parameters={ + "type": "object", + "properties": { + "numbers": { + "type": "array", + "description": "List of numbers", + }, + "precision": { + "type": "integer", + "description": "Decimal precision", + }, + }, + }, + ), + ), + ] + + def test_has_tool_call_positive(self): + """Test detection of Qwen3-coder XML tool calls.""" + text_with_tool = """Some text before + + + +Beijing + + + +Some text after""" + + assert self.processor.has_tool_call(text_with_tool) is True + + def test_has_tool_call_negative(self): + """Test when no tool calls are present.""" + text_without_tool = "This is just regular text with no tool calls." + + assert self.processor.has_tool_call(text_without_tool) is False + + def test_parse_single_tool_call(self): + """Test parsing a single Qwen3-coder XML tool call.""" + xml_text = """ + + +Beijing + + +2024-06-27 + + +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 1 + assert isinstance(result[0], ToolCall) + assert result[0].function.name == "get_weather" + + arguments = json.loads(result[0].function.arguments) + assert arguments["city"] == "Beijing" + assert arguments["date"] == "2024-06-27" + + def test_parse_multiple_tool_calls(self): + """Test parsing multiple Qwen3-coder XML tool calls.""" + xml_text = """ + + +Beijing + + +2024-06-27 + + + + + + + +[1, 2, 3, 4, 5] + + +2 + + +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 2 + + # First tool call + assert result[0].function.name == "get_weather" + args1 = json.loads(result[0].function.arguments) + assert args1["city"] == "Beijing" + assert args1["date"] == "2024-06-27" + + # Second tool call + assert result[1].function.name == "calculate_sum" + args2 = json.loads(result[1].function.arguments) + assert args2["numbers"] == [1, 2, 3, 4, 5] + assert args2["precision"] == 2 + + def test_parse_with_multiline_parameters(self): + """Test parsing Qwen3-coder XML tool calls with multi-line parameter values.""" + xml_text = """ + + +Beijing + + +This is a multi-line +parameter value that spans +multiple lines + + +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 1 + arguments = json.loads(result[0].function.arguments) + assert arguments["city"] == "Beijing" + assert "multi-line" in arguments["description"] + assert "multiple lines" in arguments["description"] + + def test_parse_with_json_values(self): + """Test parsing Qwen3-coder XML tool calls with JSON-formatted parameters.""" + xml_text = """ + + +[10, 20, 30] + + +3 + + +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 1 + arguments = json.loads(result[0].function.arguments) + assert arguments["numbers"] == [10, 20, 30] + assert arguments["precision"] == 3 + + def test_parse_with_surrounding_text(self): + """Test parsing Qwen3-coder XML tool calls with surrounding text.""" + xml_text = """I need to check the weather and do some calculations. + + + + +Shanghai + + +metric + + + + +Let me also calculate something: + + + + +[5, 10, 15] + + + + +That should do it.""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + assert len(result) == 2 + assert result[0].function.name == "get_weather" + assert result[1].function.name == "calculate_sum" + + def test_parse_malformed_xml(self): + """Test handling of malformed Qwen3-coder XML.""" + malformed_xml = """ + + +Beijing + + +2024-06-27 + +""" # Missing closing tag for parameter + + result = self.processor.parse_xml_to_json(malformed_xml, self.sample_tools) + + # Should still parse the function but may miss malformed parameters + assert len(result) == 1 + assert result[0].function.name == "get_weather" + + def test_empty_input(self): + """Test parsing empty input.""" + result = self.processor.parse_xml_to_json("", self.sample_tools) + assert len(result) == 0 + + def test_no_matching_tools(self): + """Test parsing with no matching tools in the tool list.""" + xml_text = """ + + +value + + +""" + + result = self.processor.parse_xml_to_json(xml_text, self.sample_tools) + + # Should still parse but with no type validation + assert len(result) == 1 + assert result[0].function.name == "unknown_function" + + +class TestBaseXMLToolCallProcessor: + """Test base XML tool call processor functionality.""" + + def test_parse_arguments_json(self): + """Test parsing JSON-formatted argument values.""" + processor = GLM45ToolCallProcessor() # Use concrete implementation + + # Test JSON parsing + result, success = processor._parse_arguments('{"key": "value"}') + assert success is True + assert result == {"key": "value"} + + # Test array parsing + result, success = processor._parse_arguments("[1, 2, 3]") + assert success is True + assert result == [1, 2, 3] + + # Test number parsing + result, success = processor._parse_arguments("42") + assert success is True + assert result == 42 + + def test_parse_arguments_literal(self): + """Test parsing literal argument values.""" + processor = GLM45ToolCallProcessor() + + # Test string that can't be parsed as JSON + result, success = processor._parse_arguments("simple_string") + assert success is False + assert result == "simple_string" + + def test_get_argument_type(self): + """Test getting argument type from tool definition.""" + processor = GLM45ToolCallProcessor() + tools = [ + ToolSpec( + type="function", + function=Function( + name="test_func", + description="Test function", + parameters={ + "type": "object", + "properties": { + "str_param": {"type": "string"}, + "int_param": {"type": "integer"}, + }, + }, + ), + ) + ] + + assert processor._get_argument_type("test_func", "str_param", tools) == "string" + assert ( + processor._get_argument_type("test_func", "int_param", tools) == "integer" + ) + assert processor._get_argument_type("test_func", "unknown_param", tools) is None + assert processor._get_argument_type("unknown_func", "param", tools) is None + + +if __name__ == "__main__": + pytest.main([__file__])