Skip to content

Commit 842bfd4

Browse files
committed
Merge PR 666ghj#463: multi-provider LLM support via Prompture
# Conflicts: # .env.example # backend/app/utils/llm_client.py
2 parents dd671cb + 25909cc commit 842bfd4

File tree

5 files changed

+288
-62
lines changed

5 files changed

+288
-62
lines changed

.env.example

Lines changed: 38 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,44 @@
1-
# LLM API配置(支持 OpenAI SDK 格式的任意 LLM API)
2-
# 推荐使用阿里百炼平台qwen-plus模型:https://bailian.console.aliyun.com/
3-
# 注意消耗较大,可先进行小于40轮的模拟尝试
1+
# ===== LLM API Configuration =====
2+
# Default: any OpenAI-compatible API
3+
# With Prompture installed (pip install prompture): 12+ providers supported
4+
#
5+
# ── OpenAI-compatible (default, no Prompture needed) ──
46
LLM_API_KEY=your_api_key_here
57
LLM_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
68
LLM_MODEL_NAME=qwen-plus
9+
#
10+
# ── With Prompture: use "provider/model" format ──
11+
# LM Studio (free, local):
12+
# LLM_MODEL_NAME=lmstudio/local-model
13+
# LLM_BASE_URL=http://localhost:1234/v1
14+
# LLM_API_KEY=lm-studio
15+
#
16+
# Ollama (free, local):
17+
# LLM_MODEL_NAME=ollama/llama3.1:8b
18+
#
19+
# Kimi / Moonshot:
20+
# LLM_MODEL_NAME=moonshot/moonshot-v1-8k
21+
# LLM_API_KEY=your_moonshot_key
22+
#
23+
# Claude:
24+
# LLM_MODEL_NAME=claude/claude-sonnet-4-20250514
25+
# LLM_API_KEY=sk-ant-...
26+
#
27+
# Groq (fast, free tier):
28+
# LLM_MODEL_NAME=groq/llama-3.1-70b-versatile
29+
# LLM_API_KEY=gsk_...
30+
#
31+
# See all providers: https://github.com/jhd3197/prompture#providers
732

8-
# ===== ZEP记忆图谱配置 =====
9-
# 每月免费额度即可支撑简单使用:https://app.getzep.com/
33+
# ===== ZEP Memory Graph =====
34+
# Free monthly quota: https://app.getzep.com/
1035
ZEP_API_KEY=your_zep_api_key_here
1136

12-
# ===== 加速 LLM 配置(可选)=====
13-
# 注意如果不使用加速配置,env文件中就不要出现下面的配置项
14-
LLM_BOOST_API_KEY=your_api_key_here
15-
LLM_BOOST_BASE_URL=your_base_url_here
16-
LLM_BOOST_MODEL_NAME=your_model_name_here
17-
# ===== 前端API超时配置(可选)=====
18-
# 本地大模型响应较慢时可以增加此值(毫秒)
19-
# VITE_API_TIMEOUT=600000 # 10分钟
37+
# ===== Boost LLM (optional) =====
38+
# LLM_BOOST_API_KEY=your_api_key_here
39+
# LLM_BOOST_BASE_URL=your_base_url_here
40+
# LLM_BOOST_MODEL_NAME=your_model_name_here
41+
42+
# ===== Frontend API timeout (optional) =====
43+
# Increase this value for slow local LLMs (milliseconds)
44+
# VITE_API_TIMEOUT=600000 # 10 minutes

README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,29 @@ LLM_MODEL_NAME=qwen-plus
127127
ZEP_API_KEY=your_zep_api_key
128128
```
129129

130+
#### Multi-Provider Support (Optional)
131+
132+
Install [Prompture](https://github.com/jhd3197/prompture) to unlock 12+ LLM providers beyond OpenAI-compatible APIs:
133+
134+
```bash
135+
pip install prompture
136+
```
137+
138+
Then use `"provider/model"` format in your `.env`:
139+
140+
| Provider | `LLM_MODEL_NAME` | Cost |
141+
|---|---|---|
142+
| LM Studio | `lmstudio/local-model` | Free (local) |
143+
| Ollama | `ollama/llama3.1:8b` | Free (local) |
144+
| OpenAI | `openai/gpt-4o` | Paid |
145+
| Claude | `claude/claude-sonnet-4-20250514` | Paid |
146+
| Kimi / Moonshot | `moonshot/moonshot-v1-8k` | Paid |
147+
| Groq | `groq/llama-3.1-70b-versatile` | Free tier |
148+
| Google | `google/gemini-1.5-pro` | Paid |
149+
| OpenRouter | `openrouter/anthropic/claude-2` | Paid |
150+
151+
> Without Prompture, the original OpenAI SDK backend works as before — no changes needed.
152+
130153
#### 2. Install Dependencies
131154

132155
```bash

backend/app/utils/llm_client.py

Lines changed: 153 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,104 +1,209 @@
11
"""
22
LLM客户端封装
3-
统一使用OpenAI格式调用
3+
Supports two backends:
4+
1. Prompture (optional) — 12+ providers: LM Studio, Ollama, Claude, Groq, Kimi, etc.
5+
2. OpenAI SDK (default fallback) — any OpenAI-compatible API
6+
Install Prompture for multi-provider support: pip install prompture
47
"""
58

69
import json
710
import re
811
from typing import Optional, Dict, Any, List
9-
from openai import OpenAI
1012

1113
from ..config import Config
1214

15+
# Try to import Prompture; fall back to OpenAI SDK if not installed
16+
try:
17+
from prompture.agents import Conversation
18+
from prompture.infra.provider_env import ProviderEnvironment
19+
from prompture.extraction.tools import strip_think_tags, clean_json_text
20+
_HAS_PROMPTURE = True
21+
except ImportError:
22+
_HAS_PROMPTURE = False
23+
24+
if not _HAS_PROMPTURE:
25+
from openai import OpenAI
26+
27+
28+
# Provider name → ProviderEnvironment field name
29+
_KEY_MAP = {
30+
"openai": "openai_api_key",
31+
"claude": "claude_api_key",
32+
"google": "google_api_key",
33+
"groq": "groq_api_key",
34+
"grok": "grok_api_key",
35+
"openrouter": "openrouter_api_key",
36+
"moonshot": "moonshot_api_key",
37+
}
38+
1339

1440
class LLMClient:
15-
"""LLM客户端"""
16-
41+
"""LLM客户端
42+
43+
When Prompture is installed, ``model`` accepts the ``"provider/model"``
44+
format for multi-provider support::
45+
46+
"lmstudio/local-model" → LM Studio (free, local)
47+
"ollama/llama3.1:8b" → Ollama (free, local)
48+
"openai/gpt-4o" → OpenAI
49+
"claude/claude-sonnet-4-20250514" → Anthropic
50+
"moonshot/moonshot-v1-8k" → Kimi / Moonshot
51+
"groq/llama-3.1-70b" → Groq
52+
53+
Without Prompture, the original OpenAI SDK backend is used (any
54+
OpenAI-compatible API via LLM_BASE_URL).
55+
"""
56+
1757
def __init__(
1858
self,
1959
api_key: Optional[str] = None,
2060
base_url: Optional[str] = None,
21-
model: Optional[str] = None
61+
model: Optional[str] = None,
2262
):
2363
self.api_key = api_key or Config.LLM_API_KEY
2464
self.base_url = base_url or Config.LLM_BASE_URL
2565
self.model = model or Config.LLM_MODEL_NAME
26-
66+
67+
if _HAS_PROMPTURE:
68+
self._init_prompture()
69+
else:
70+
self._init_openai()
71+
72+
# ── Prompture backend ──────────────────────────────────────────
73+
74+
def _init_prompture(self):
75+
env_kwargs: Dict[str, Any] = {}
76+
if self.api_key:
77+
provider = self.model.split("/")[0] if "/" in self.model else "openai"
78+
env_field = _KEY_MAP.get(provider)
79+
if env_field:
80+
env_kwargs[env_field] = self.api_key
81+
82+
self._env = ProviderEnvironment(**env_kwargs) if env_kwargs else None
83+
self._driver_options: Dict[str, Any] = {}
84+
if self.base_url:
85+
self._driver_options["base_url"] = self.base_url
86+
87+
def _make_conversation(self, temperature: float, max_tokens: int) -> "Conversation":
88+
opts: Dict[str, Any] = {
89+
"temperature": temperature,
90+
"max_tokens": max_tokens,
91+
**self._driver_options,
92+
}
93+
return Conversation(self.model, options=opts, env=self._env)
94+
95+
# ── OpenAI fallback backend ────────────────────────────────────
96+
97+
def _init_openai(self):
2798
if not self.api_key:
2899
raise ValueError("LLM_API_KEY 未配置")
29-
30-
self.client = OpenAI(
31-
api_key=self.api_key,
32-
base_url=self.base_url
33-
)
34-
100+
self.client = OpenAI(api_key=self.api_key, base_url=self.base_url)
101+
102+
# ── Public API ─────────────────────────────────────────────────
103+
35104
def chat(
36105
self,
37106
messages: List[Dict[str, str]],
38107
temperature: float = 0.7,
39108
max_tokens: int = 4096,
40-
response_format: Optional[Dict] = None
109+
response_format: Optional[Dict] = None,
41110
) -> str:
42111
"""
43112
发送聊天请求
44-
113+
45114
Args:
46115
messages: 消息列表
47116
temperature: 温度参数
48117
max_tokens: 最大token数
49118
response_format: 响应格式(如JSON模式)
50-
119+
51120
Returns:
52121
模型响应文本
53122
"""
54-
kwargs = {
55-
"model": self.model,
56-
"messages": messages,
57-
"temperature": temperature,
58-
"max_tokens": max_tokens,
59-
}
60-
61-
if response_format:
62-
kwargs["response_format"] = response_format
63-
64-
response = self.client.chat.completions.create(**kwargs)
65-
content = response.choices[0].message.content
66-
# 部分模型(如MiniMax M2.5)会在content中包含<think>思考内容,需要移除
67-
content = re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
68-
return content
69-
123+
if _HAS_PROMPTURE:
124+
content = self._chat_prompture(messages, temperature, max_tokens)
125+
return strip_think_tags(content)
126+
else:
127+
content = self._chat_openai(messages, temperature, max_tokens, response_format)
128+
# Fallback: strip think tags with regex when Prompture is not available
129+
return re.sub(r'<think>[\s\S]*?</think>', '', content).strip()
130+
70131
def chat_json(
71132
self,
72133
messages: List[Dict[str, str]],
73134
temperature: float = 0.3,
74-
max_tokens: int = 4096
135+
max_tokens: int = 4096,
75136
) -> Dict[str, Any]:
76137
"""
77138
发送聊天请求并返回JSON
78-
139+
79140
Args:
80141
messages: 消息列表
81142
temperature: 温度参数
82143
max_tokens: 最大token数
83-
144+
84145
Returns:
85146
解析后的JSON对象
86147
"""
87-
response = self.chat(
88-
messages=messages,
89-
temperature=temperature,
90-
max_tokens=max_tokens,
91-
# 不設 response_format 以相容 LM Studio / Ollama 等本地模型
92-
# 依賴 prompt 中的 JSON 指示 + 下方的 markdown 清理邏輯
93-
)
94-
# 清理markdown代码块标记
95-
cleaned_response = response.strip()
96-
cleaned_response = re.sub(r'^```(?:json)?\s*\n?', '', cleaned_response, flags=re.IGNORECASE)
97-
cleaned_response = re.sub(r'\n?```\s*$', '', cleaned_response)
98-
cleaned_response = cleaned_response.strip()
148+
if _HAS_PROMPTURE:
149+
response = self._chat_prompture(messages, temperature, max_tokens)
150+
# Prompture's clean_json_text strips think tags + markdown fences
151+
cleaned = clean_json_text(response)
152+
else:
153+
response = self._chat_openai(
154+
messages, temperature, max_tokens
155+
)
156+
# Fallback cleaning when Prompture is not available
157+
cleaned = re.sub(r'<think>[\s\S]*?</think>', '', response).strip()
158+
cleaned = re.sub(r'^```(?:json)?\s*\n?', '', cleaned, flags=re.IGNORECASE)
159+
cleaned = re.sub(r'\n?```\s*$', '', cleaned)
160+
cleaned = cleaned.strip()
99161

100162
try:
101-
return json.loads(cleaned_response)
163+
return json.loads(cleaned)
102164
except json.JSONDecodeError:
103-
raise ValueError(f"LLM返回的JSON格式无效: {cleaned_response}")
165+
raise ValueError(f"LLM返回的JSON格式无效: {cleaned}")
166+
167+
# ── Private: Prompture path ────────────────────────────────────
168+
169+
def _chat_prompture(
170+
self,
171+
messages: List[Dict[str, str]],
172+
temperature: float,
173+
max_tokens: int,
174+
) -> str:
175+
conv = self._make_conversation(temperature, max_tokens)
176+
177+
# Inject system prompt
178+
system_parts = [m["content"] for m in messages if m["role"] == "system"]
179+
if system_parts:
180+
conv._messages.append({"role": "system", "content": "\n".join(system_parts)})
181+
182+
# Replay prior turns
183+
non_system = [m for m in messages if m["role"] != "system"]
184+
for msg in non_system[:-1]:
185+
conv._messages.append({"role": msg["role"], "content": msg["content"]})
186+
187+
prompt = non_system[-1]["content"] if non_system else ""
188+
return conv.ask(prompt)
104189

190+
# ── Private: OpenAI fallback path ──────────────────────────────
191+
192+
def _chat_openai(
193+
self,
194+
messages: List[Dict[str, str]],
195+
temperature: float,
196+
max_tokens: int,
197+
response_format: Optional[Dict] = None,
198+
) -> str:
199+
kwargs = {
200+
"model": self.model,
201+
"messages": messages,
202+
"temperature": temperature,
203+
"max_tokens": max_tokens,
204+
}
205+
if response_format:
206+
kwargs["response_format"] = response_format
207+
208+
response = self.client.chat.completions.create(**kwargs)
209+
return response.choices[0].message.content

backend/requirements.txt

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,14 @@ flask>=3.0.0
1010
flask-cors>=6.0.0
1111

1212
# ============= LLM 相关 =============
13-
# OpenAI SDK(统一使用 OpenAI 格式调用 LLM)
13+
# OpenAI SDK(默认 LLM 后端
1414
openai>=1.0.0
1515

16+
# Prompture(可选)— 多供应商 LLM 支持:LM Studio, Ollama, Claude, Groq, Kimi 等
17+
# Install for multi-provider support: pip install prompture
18+
# https://github.com/jhd3197/prompture
19+
# prompture>=0.1.0
20+
1621
# ============= Zep Cloud =============
1722
zep-cloud==3.13.0
1823

0 commit comments

Comments
 (0)