theroyallab · bold84 · Aug 25, 2025 · Aug 25, 2025 · Aug 25, 2025 · Aug 26, 2025
diff --git a/common/templating.py b/common/templating.py
@@ -30,6 +30,9 @@ class TemplateMetadata:
 
     stop_strings: List[str] = field(default_factory=list)
     tool_start: Optional[str] = None
+    tool_end: Optional[str] = None
+    tool_call_format: str = "json"  # "json" or "xml"
+    xml_processor_type: Optional[str] = None  # "glm45", "custom", etc.
 
 
 class PromptTemplate:
@@ -76,6 +79,20 @@ async def extract_metadata(self, template_vars: dict):
             if isinstance(template_module.tool_start, str):
                 template_metadata.tool_start = template_module.tool_start
 
+        if hasattr(template_module, "tool_end"):
+            if isinstance(template_module.tool_end, str):
+                template_metadata.tool_end = template_module.tool_end
+
+        if hasattr(template_module, "tool_call_format"):
+            if isinstance(template_module.tool_call_format, str):
+                template_metadata.tool_call_format = template_module.tool_call_format
+
+        if hasattr(template_module, "xml_processor_type"):
+            if isinstance(template_module.xml_processor_type, str):
+                template_metadata.xml_processor_type = (
+                    template_module.xml_processor_type
+                )
+
         self.metadata = template_metadata
         return template_metadata
 

diff --git a/docs/XML-Tool-Calling-Implementation.md b/docs/XML-Tool-Calling-Implementation.md
@@ -0,0 +1,311 @@
+# XML Tool Calling Implementation for TabbyAPI
+
+This document describes the XML-based tool calling support implemented for GLM-4.5 and Qwen3-coder models in TabbyAPI.
+
+## Overview
+
+Some models (GLM-4.5, Qwen3-coder) generate tool calls in XML format, which differs from the OpenAI JSON format that TabbyAPI expects. This implementation provides a generic XML tool call processor that converts various XML tool call formats to OpenAI-compatible JSON format.
+
+## Architecture
+
+### Components
+
+1. **BaseXMLToolCallProcessor** (`endpoints/OAI/utils/xml_tool_processors.py`)
+   - Abstract base class for XML tool call processors
+   - Provides common functionality for parsing and converting tool calls
+   - Extensible design allows support for other XML-based models
+
+2. **GLM45ToolCallProcessor** (`endpoints/OAI/utils/xml_tool_processors.py`)
+   - Concrete implementation for GLM-4.5 specific XML format
+   - Handles the `<tool_call>` and `<arg_key>/<arg_value>` structure
+   - Converts XML to OpenAI JSON format
+
+3. **Qwen3CoderToolCallProcessor** (`endpoints/OAI/utils/xml_tool_processors.py`)
+   - Concrete implementation for Qwen3-coder specific XML format
+   - Handles nested `<tool_call><function=name><parameter=name>value</parameter></function></tool_call>` structure
+   - Supports multi-line parameter values
+   - Converts XML to OpenAI JSON format
+
+4. **XMLToolCallProcessorFactory** (`endpoints/OAI/utils/xml_tool_processors.py`)
+   - Factory class for creating appropriate XML processors
+   - Supports GLM-4.5 ("glm45", "glm-4.5", "glm4") and Qwen3-coder ("qwen3-coder", "qwen3") processors
+   - Supports extensibility by allowing registration of new processor types
+
+5. **Enhanced TemplateMetadata** (`common/templating.py`)
+   - Extended to support XML tool call configuration
+   - New fields: `tool_call_format`, `xml_processor_type`, `tool_end`
+
+6. **Enhanced ToolCallProcessor** (`endpoints/OAI/utils/tools.py`)
+   - Added `from_text()` method that routes to appropriate processor
+   - Added `from_xml()` method for XML-specific processing
+   - Maintains backward compatibility with JSON processing
+
+### Supported XML Formats
+
+#### GLM-4.5 XML Format
+
+The GLM-4.5 model generates tool calls in this format:
+
+```xml
+<tool_call>function_name
+<arg_key>parameter1</arg_key>
+<arg_value>value1</arg_value>
+<arg_key>parameter2</arg_key>
+<arg_value>value2</arg_value>
+</tool_call>
+```
+
+#### Qwen3-coder XML Format
+
+The Qwen3-coder model generates tool calls in this nested format:
+
+```xml
+<tool_call>
+<function=function_name>
+<parameter=parameter1>
+value1
+</parameter>
+<parameter=parameter2>
+This is a multi-line
+parameter value that spans
+multiple lines
+</parameter>
+</function>
+</tool_call>
+```
+
+Both formats get converted to OpenAI JSON format:
+
+```json
+{
+  "id": "call_12345",
+  "type": "function",
+  "function": {
+    "name": "function_name",
+    "arguments": "{\"parameter1\": \"value1\", \"parameter2\": \"value2\"}"
+  }
+}
+```
+
+## Usage
+
+### Template Configuration
+
+#### GLM-4.5 Template
+
+The GLM-4.5 template (`templates/tool_calls/glm-4p5-chat-template-tabbyapi.jinja`) includes:
+
+```jinja
+{# Metadata #}
+{%- set stop_strings = ["<|user|>", "<|assistant|>", "<|observation|>", "<|system|>"] -%}
+{%- set tool_start = "<tool_call>" -%}
+{%- set tool_end = "</tool_call>" -%}
+{%- set tool_call_format = "xml" -%}
+{%- set xml_processor_type = "glm45" -%}
+```
+
+#### Qwen3-coder Template
+
+The Qwen3-coder template (`templates/tool_calls/qwen3-coder-tabbyapi.jinja`) includes:
+
+```jinja
+{# XML Tool Call Processing Configuration #}
+{%- set tool_call_format = "xml" -%}
+{%- set xml_processor_type = "qwen3-coder" -%}
+```
+
+### Loading Models
+
+#### GLM-4.5 Models
+
+When loading a GLM-4.5 model, specify the tool-calling template:
+
+```yaml
+# config.yml
+model:
+  model_name: "path/to/glm-4.5-model"
+  prompt_template: "tool_calls/glm-4p5-chat-template-tabbyapi"
+```
+
+#### Qwen3-coder Models
+
+When loading a Qwen3-coder model, specify the tool-calling template:
+
+```yaml
+# config.yml
+model:
+  model_name: "path/to/qwen3-coder-model"
+  prompt_template: "tool_calls/qwen3-coder-tabbyapi"
+```
+
+Or via API:
+
+```bash
+# GLM-4.5
+curl -X POST "http://localhost:5000/v1/model/load" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "path/to/glm-4.5-model",
+    "prompt_template": "tool_calls/glm-4p5-chat-template-tabbyapi"
+  }'
+
+# Qwen3-coder
+curl -X POST "http://localhost:5000/v1/model/load" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "name": "path/to/qwen3-coder-model",
+    "prompt_template": "tool_calls/qwen3-coder-tabbyapi"
+  }'
+```
+
+### Tool Call Request
+
+Standard OpenAI-compatible tool calling request:
+
+```json
+{
+  "model": "glm-4.5",
+  "messages": [
+    {
+      "role": "user",
+      "content": "What's the weather in Beijing?"
+    }
+  ],
+  "tools": [
+    {
+      "type": "function",
+      "function": {
+        "name": "get_weather",
+        "description": "Get weather information",
+        "parameters": {
+          "type": "object",
+          "properties": {
+            "city": {
+              "type": "string",
+              "description": "City name"
+            },
+            "date": {
+              "type": "string",
+              "description": "Date in YYYY-MM-DD format"
+            }
+          },
+          "required": ["city"]
+        }
+      }
+    }
+  ]
+}
+```
+
+## Integration Flow
+
+1. **Template Processing**: The template metadata indicates XML format tool calls
+2. **Model Generation**: GLM-4.5 generates XML tool calls when `<tool_call>` trigger is detected
+3. **XML Parsing**: `GLM45ToolCallProcessor` parses the XML structure
+4. **JSON Conversion**: XML is converted to OpenAI-compatible JSON format
+5. **Standard Pipeline**: Converted tool calls flow through normal TabbyAPI processing
+
+## Extensibility
+
+### Adding New XML Processors
+
+To support other XML-based models:
+
+1. Create a new processor class extending `BaseXMLToolCallProcessor`
+2. Implement the required methods for the specific XML format
+3. Register the processor with the factory:
+
+```python
+# Custom processor
+class CustomXMLProcessor(BaseXMLToolCallProcessor):
+    def has_tool_call(self, text: str) -> bool:
+        return "<custom_tool>" in text
+
+    def parse_xml_to_json(self, text: str, tools: List[Tool]) -> List[ToolCall]:
+        # Custom parsing logic
+        pass
+
+# Register processor
+XMLToolCallProcessorFactory.register_processor("custom", CustomXMLProcessor)
+```
+
+### Template Configuration
+
+Create a template with appropriate metadata:
+
+```jinja
+{%- set tool_call_format = "xml" -%}
+{%- set xml_processor_type = "custom" -%}
+{%- set tool_start = "<custom_tool>" -%}
+{%- set tool_end = "</custom_tool>" -%}
+```
+
+## Testing
+
+Unit tests are provided in `tests/test_xml_tool_calls.py` covering:
+
+- XML parsing functionality
+- Multiple tool call handling  
+- JSON conversion accuracy
+- Error handling for malformed XML
+- Factory pattern functionality
+- Argument type processing
+
+Run tests with:
+
+```bash
+python -m pytest tests/test_xml_tool_calls.py -v
+```
+
+## Error Handling
+
+The implementation includes robust error handling:
+
+- **Malformed XML**: Returns empty tool call list, logs error
+- **Unknown Functions**: Still processes but without type validation
+- **Parsing Failures**: Falls back gracefully, maintains system stability
+- **Missing Dependencies**: Graceful degradation to JSON processing
+
+## Performance Considerations
+
+- **Regex-based Parsing**: Efficient for typical tool call volumes
+- **Lazy Evaluation**: Processors created only when needed
+- **Memory Efficient**: Processes tool calls incrementally
+- **Caching**: Template metadata cached after first extraction
+
+## Compatibility
+
+- **Backward Compatible**: Existing JSON tool calling continues to work
+- **OpenAI Standard**: Output format matches OpenAI API specification
+- **Streaming Support**: Works with both streaming and non-streaming responses
+- **Multi-tool**: Supports multiple tool calls in single response
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Tool calls not detected**
+   - Verify template has `tool_call_format = "xml"`
+   - Check `tool_start` matches model output
+   - Ensure `xml_processor_type` is correct
+
+2. **Parsing errors**
+   - Validate XML format matches expected structure
+   - Check for missing closing tags
+   - Verify argument key/value pairing
+
+3. **JSON conversion failures**
+   - Check argument types in tool definitions
+   - Validate JSON-formatted argument values
+   - Review error logs for specific parsing issues
+
+### Debug Mode
+
+Enable detailed logging for troubleshooting:
+
+```python
+import logging
+logging.getLogger("endpoints.OAI.utils.xml_tool_processors").setLevel(logging.DEBUG)
+```
+
+This implementation provides a robust, extensible foundation for XML-based tool calling in TabbyAPI while maintaining full compatibility with existing JSON-based tool calling functionality.
diff --git a/endpoints/OAI/utils/chat_completion.py b/endpoints/OAI/utils/chat_completion.py
@@ -27,13 +27,17 @@
     ChatCompletionResponse,
     ChatCompletionStreamChoice,
 )
+from endpoints.OAI.types.tools import ToolSpec
 from endpoints.OAI.types.common import UsageStats
 from endpoints.OAI.utils.completion import _parse_gen_request_id, _stream_collector
 from endpoints.OAI.utils.tools import ToolCallProcessor, TOOL_CALL_SCHEMA
 
 
 def _create_response(
-    request_id: str, generations: List[dict], model_name: Optional[str]
+    request_id: str,
+    generations: List[dict],
+    model_name: Optional[str],
+    tools: Optional[List[ToolSpec]] = None,
 ):
     """Create a chat completion response from the provided text."""
 
@@ -144,9 +148,21 @@ def _create_stream_chunk(
         # Mark finish_reason as tool_calls since this is the last chunk
         if "tool_calls" in generation:
             tool_calls = generation["tool_calls"]
-            message = ChatCompletionMessage(
-                tool_calls=ToolCallProcessor.from_json(tool_calls)
-            )
+            # Get template metadata for tool call processing
+            template_metadata = model.container.prompt_template.metadata
+            if template_metadata and template_metadata.tool_call_format == "xml":
+                # Use XML processor for XML-based tool calls
+                processed_tool_calls = ToolCallProcessor.from_text(
+                    tool_calls,
+                    [],  # We don't have tools context in streaming
+                    tool_call_format="xml",
+                    xml_processor_type=template_metadata.xml_processor_type,
+                )
+            else:
+                # Default to JSON processor
+                processed_tool_calls = ToolCallProcessor.from_json(tool_calls)
+
+            message = ChatCompletionMessage(tool_calls=processed_tool_calls)
             choice.delta = message
             choice.finish_reason = "tool_calls"
 
@@ -445,7 +461,9 @@ async def generate_chat_completion(
                 prompt, embeddings, data, generations, request
             )
 
-        response = _create_response(request.state.id, generations, model_path.name)
+        response = _create_response(
+            request.state.id, generations, model_path.name, data.tools
+        )
 
         logger.info(f"Finished chat completion request {request.state.id}")