-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathsetup_chat_template.py
More file actions
51 lines (38 loc) · 1.31 KB
/
setup_chat_template.py
File metadata and controls
51 lines (38 loc) · 1.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#!/usr/bin/env python3
"""
在运行时设置tokenizer的chat_template
"""
def setup_chat_template(tokenizer):
"""为tokenizer设置chat_template"""
if tokenizer.chat_template is None:
# 设置Qwen的chat template
chat_template = """{% for message in messages %}
{% if loop.first and messages[0]['role'] != 'system' %}
{{ bos_token + message['content'] }}
{% elif message['role'] == 'system' %}
{{ message['content'] }}
{% elif message['role'] == 'user' %}
{{ '### Human: ' + message['content'] + '\n\n### Assistant: ' }}
{% elif message['role'] == 'assistant' %}
{{ message['content'] + eos_token }}
{% endif %}
{% endfor %}"""
tokenizer.chat_template = chat_template
print("已设置chat_template")
return tokenizer
# 使用示例
if __name__ == "__main__":
from transformers import AutoTokenizer
# 加载tokenizer
tokenizer = AutoTokenizer.from_pretrained(
"/home/ma-user/work/lilong/download/save_models/Qwen3-4B-Base",
trust_remote_code=True
)
# 设置chat template
tokenizer = setup_chat_template(tokenizer)
# 测试
messages = [
{"role": "user", "content": "Hello, how are you?"}
]
result = tokenizer.apply_chat_template(messages, tokenize=False)
print(f"结果: {result}")