Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,6 @@ API_KEY=默认使用通义千问,apikey通过百炼模型平台获取
COOKIES_STR=your_cookies_here
MODEL_BASE_URL=https://dashscope.aliyuncs.com/compatible-mode/v1
MODEL_NAME=qwen-max
TOGGLE_KEYWORDS=。
TOGGLE_KEYWORDS=。
ENABLE_INTENT=1
MAX_USER_HISTORY=5
98 changes: 33 additions & 65 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,70 +1,38 @@
FROM python:3.10-alpine AS builder

WORKDIR /app

# 只安装构建所需的依赖
RUN apk add --no-cache --virtual .build-deps \
gcc \
musl-dev \
libffi-dev \
build-base \
curl

# 创建虚拟环境并安装依赖
RUN python -m venv /opt/venv
ENV PATH="/opt/venv/bin:$PATH"

# 复制依赖文件并安装
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

# 第二阶段:最终镜像
FROM python:3.10-alpine

# 添加元数据标签
LABEL maintainer="coderxiu<[email protected]>"
LABEL description="闲鱼AI客服机器人"
LABEL version="1.0"

# 设置时区和编码
ENV TZ=Asia/Shanghai \
PYTHONIOENCODING=utf-8 \
LANG=C.UTF-8 \
PATH="/opt/venv/bin:$PATH" \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1

# 只安装运行时必要的包
RUN apk add --no-cache \
tzdata \
nodejs \
npm \
&& npm install -g [email protected] \
&& npm cache clean --force \
&& ln -snf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
&& echo Asia/Shanghai > /etc/timezone \
# 减小apk缓存
&& rm -rf /var/cache/apk/*

# 设置工作目录
FROM python:3.8

# ---------------------------
# 1. 设置国内Apt源,加速系统包安装
# ---------------------------
RUN echo "deb https://mirrors.tuna.tsinghua.edu.cn/debian stable main contrib non-free" > /etc/apt/sources.list && \
echo "deb https://mirrors.tuna.tsinghua.edu.cn/debian stable-updates main contrib non-free" >> /etc/apt/sources.list && \
echo "deb https://mirrors.tuna.tsinghua.edu.cn/debian-security stable-security main contrib non-free" >> /etc/apt/sources.list && \
rm -rf /etc/apt/sources.list.d/* && \
apt-get update && \
apt-get install -y curl gnupg && \
# ---------------------------
# 2. 安装Node.js 18(官方推荐方式)
# ---------------------------
curl -fsSL https://deb.nodesource.com/setup_18.x | bash - && \
apt-get install -y nodejs && \
node -v && npm -v && \
apt-get clean && rm -rf /var/lib/apt/lists/*

# ---------------------------
# 3. 用清华PyPI源加速pip,全局配置
# ---------------------------
RUN mkdir -p /root/.pip && \
echo '[global]\nindex-url = https://pypi.tuna.tsinghua.edu.cn/simple' > /root/.pip/pip.conf

# 设置容器内工作目录
WORKDIR /app

# 从构建阶段复制虚拟环境
COPY --from=builder /opt/venv /opt/venv

# 创建必要的目录
RUN mkdir -p data prompts

# 复制示例提示词文件并重命名为正式文件
COPY prompts/classify_prompt_example.txt prompts/classify_prompt.txt
COPY prompts/price_prompt_example.txt prompts/price_prompt.txt
COPY prompts/tech_prompt_example.txt prompts/tech_prompt.txt
COPY prompts/default_prompt_example.txt prompts/default_prompt.txt
# 复制所有项目文件到镜像
COPY . .

# 只复制绝对必要的文件
COPY main.py XianyuAgent.py XianyuApis.py context_manager.py ./
COPY utils/ utils/
COPY static/ static/
# ---------------------------
# 4. 升级pip、安装Python依赖(走清华PyPI源)
# ---------------------------
RUN pip install --upgrade pip && pip install -r requirements.txt

# 容器启动时运行的命令
# 启动主程序
CMD ["python", "main.py"]
126 changes: 58 additions & 68 deletions XianyuAgent.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import os
from openai import OpenAI
from loguru import logger

import time

class XianyuReplyBot:
def __init__(self):
# 意图识别开关,取值见 .env: ENABLE_INTENT=False/True/0/1
self.max_user_history = int(os.getenv("MAX_USER_HISTORY", "5"))
self.enable_intent = str(os.getenv("ENABLE_INTENT", "1")).lower() in ("1", "true", "yes")
# 初始化OpenAI客户端
self.client = OpenAI(
api_key=os.getenv("API_KEY"),
Expand All @@ -17,11 +20,10 @@ def __init__(self):
self.router = IntentRouter(self.agents['classify'])
self.last_intent = None # 记录最后一次意图


def _init_agents(self):
"""初始化各领域Agent"""
self.agents = {
'classify':ClassifyAgent(self.client, self.classify_prompt, self._safe_filter),
'classify': ClassifyAgent(self.client, self.classify_prompt, self._safe_filter),
'price': PriceAgent(self.client, self.price_prompt, self._safe_filter),
'tech': TechAgent(self.client, self.tech_prompt, self._safe_filter),
'default': DefaultAgent(self.client, self.default_prompt, self._safe_filter),
Expand All @@ -30,28 +32,19 @@ def _init_agents(self):
def _init_system_prompts(self):
"""初始化各Agent专用提示词,直接从文件中加载"""
prompt_dir = "prompts"

try:
# 加载分类提示词
with open(os.path.join(prompt_dir, "classify_prompt.txt"), "r", encoding="utf-8") as f:
self.classify_prompt = f.read()
logger.debug(f"已加载分类提示词,长度: {len(self.classify_prompt)} 字符")

# 加载价格提示词
with open(os.path.join(prompt_dir, "price_prompt.txt"), "r", encoding="utf-8") as f:
self.price_prompt = f.read()
logger.debug(f"已加载价格提示词,长度: {len(self.price_prompt)} 字符")

# 加载技术提示词
with open(os.path.join(prompt_dir, "tech_prompt.txt"), "r", encoding="utf-8") as f:
self.tech_prompt = f.read()
logger.debug(f"已加载技术提示词,长度: {len(self.tech_prompt)} 字符")

# 加载默认提示词
with open(os.path.join(prompt_dir, "default_prompt.txt"), "r", encoding="utf-8") as f:
self.default_prompt = f.read()
logger.debug(f"已加载默认提示词,长度: {len(self.default_prompt)} 字符")

logger.info("成功加载所有提示词")
except Exception as e:
logger.error(f"加载提示词时出错: {e}")
Expand All @@ -63,64 +56,73 @@ def _safe_filter(self, text: str) -> str:
return "[安全提醒]请通过平台沟通" if any(p in text for p in blocked_phrases) else text

def format_history(self, context: List[Dict]) -> str:
"""格式化对话历史,返回完整的对话记录"""
# 过滤掉系统消息,只保留用户和助手的对话
"""
返回最近N轮用户对话,每轮包括用户和助手各一条。
N由.env里的MAX_USER_HISTORY控制,默认5。
"""
user_assistant_msgs = [msg for msg in context if msg['role'] in ['user', 'assistant']]
return "\n".join([f"{msg['role']}: {msg['content']}" for msg in user_assistant_msgs])

def generate_reply(self, user_msg: str, item_desc: str, context: List[Dict]) -> str:
"""生成回复主流程"""
# 记录用户消息
# logger.debug(f'用户所发消息: {user_msg}')

formatted_context = self.format_history(context)
# logger.debug(f'对话历史: {formatted_context}')

# 1. 路由决策
detected_intent = self.router.detect(user_msg, item_desc, formatted_context)

# 找出最近N条用户消息的位置
user_indices = [i for i, msg in enumerate(user_assistant_msgs) if msg['role'] == 'user']
last_n_user_indices = user_indices[-self.max_user_history:] if len(
user_indices) >= self.max_user_history else user_indices

selected_indices = []
for idx in last_n_user_indices:
selected_indices.append(idx)
# 如果下一个消息是assistant,则一并加入
if idx + 1 < len(user_assistant_msgs) and user_assistant_msgs[idx + 1]['role'] == 'assistant':
selected_indices.append(idx + 1)

# 2. 获取对应Agent
# 排序去重恢复原顺序
selected_indices = sorted(set(selected_indices))
selected_msgs = [user_assistant_msgs[i] for i in selected_indices]
return "\n".join([f"{msg['role']}: {msg['content']}" for msg in selected_msgs])

internal_intents = {'classify'} # 定义不对外开放的Agent
def generate_reply(self, user_msg: str, item_desc: str, context: List[Dict]) -> str:
"""生成回复主流程"""
formatted_context = self.format_history(context)

if detected_intent in self.agents and detected_intent not in internal_intents:
agent = self.agents[detected_intent]
logger.info(f'意图识别完成: {detected_intent}')
self.last_intent = detected_intent # 保存当前意图
else:
if not self.enable_intent:
# 关闭意图识别:恒定使用 default,无议价
agent = self.agents['default']
logger.info(f'意图识别完成: default')
self.last_intent = 'default' # 保存当前意图

# 3. 获取议价次数
bargain_count = self._extract_bargain_count(context)
logger.info(f'议价次数: {bargain_count}')

# 4. 生成回复
bargain_count = 0
logger.info('[意图识别已关闭] 使用default agent,无议价')
else:
# 开启意图识别流程
detected_intent = self.router.detect(user_msg, item_desc, formatted_context)
internal_intents = {'classify'} # 内部agent不对外
if detected_intent in self.agents and detected_intent not in internal_intents:
agent = self.agents[detected_intent]
logger.info(f'意图识别完成: {detected_intent}')
self.last_intent = detected_intent
else:
agent = self.agents['default']
logger.info(f'意图识别完成: default')
self.last_intent = 'default'
bargain_count = self._extract_bargain_count(context)
logger.info(f'议价次数: {bargain_count}')

return agent.generate(
user_msg=user_msg,
item_desc=item_desc,
context=formatted_context,
bargain_count=bargain_count
)

def _extract_bargain_count(self, context: List[Dict]) -> int:
"""
从上下文中提取议价次数信息

Args:
context: 对话历史

Returns:
int: 议价次数,如果没有找到则返回0
"""
# 查找系统消息中的议价次数信息
for msg in context:
if msg['role'] == 'system' and '议价次数' in msg['content']:
try:
# 提取议价次数
match = re.search(r'议价次数[::]\s*(\d+)', msg['content'])
if match:
return int(match.group(1))
Expand All @@ -144,7 +146,7 @@ def __init__(self, classify_agent):
'tech': { # 技术类优先判定
'keywords': ['参数', '规格', '型号', '连接', '对比'],
'patterns': [
r'和.+比'
r'和.+比'
]
},
'price': {
Expand All @@ -157,31 +159,24 @@ def __init__(self, classify_agent):
def detect(self, user_msg: str, item_desc, context) -> str:
"""三级路由策略(技术优先)"""
text_clean = re.sub(r'[^\w\u4e00-\u9fa5]', '', user_msg)

# 1. 技术类关键词优先检查
if any(kw in text_clean for kw in self.rules['tech']['keywords']):
# logger.debug(f"技术类关键词匹配: {[kw for kw in self.rules['tech']['keywords'] if kw in text_clean]}")
return 'tech'

# 2. 技术类正则优先检查
for pattern in self.rules['tech']['patterns']:
if re.search(pattern, text_clean):
# logger.debug(f"技术类正则匹配: {pattern}")
return 'tech'

# 3. 价格类检查
for intent in ['price']:
if any(kw in text_clean for kw in self.rules[intent]['keywords']):
# logger.debug(f"价格类关键词匹配: {[kw for kw in self.rules[intent]['keywords'] if kw in text_clean]}")
return intent

for pattern in self.rules[intent]['patterns']:
if re.search(pattern, text_clean):
# logger.debug(f"价格类正则匹配: {pattern}")
return intent

# 4. 大模型兜底
# logger.debug("使用大模型进行意图分类")
return self.classify_agent.generate(
user_msg=user_msg,
item_desc=item_desc,
Expand Down Expand Up @@ -211,21 +206,22 @@ def _build_messages(self, user_msg: str, item_desc: str, context: str) -> List[D
]

def _call_llm(self, messages: List[Dict], temperature: float = 0.4) -> str:
"""调用大模型"""
start = time.time()
response = self.client.chat.completions.create(
model=os.getenv("MODEL_NAME", "qwen-max"),
messages=messages,
temperature=temperature,
max_tokens=500,
top_p=0.8
)
logger.info(f"LLM调用耗时: {time.time() - start:.2f}秒")
return response.choices[0].message.content


class PriceAgent(BaseAgent):
"""议价处理Agent"""

def generate(self, user_msg: str, item_desc: str, context: str, bargain_count: int=0) -> str:
def generate(self, user_msg: str, item_desc: str, context: str, bargain_count: int = 0) -> str:
"""重写生成逻辑"""
dynamic_temp = self._calc_temperature(bargain_count)
messages = self._build_messages(user_msg, item_desc, context)
Expand All @@ -247,17 +243,16 @@ def _calc_temperature(self, bargain_count: int) -> float:

class TechAgent(BaseAgent):
"""技术咨询Agent"""
def generate(self, user_msg: str, item_desc: str, context: str, bargain_count: int=0) -> str:
def generate(self, user_msg: str, item_desc: str, context: str, bargain_count: int = 0) -> str:
"""重写生成逻辑"""
messages = self._build_messages(user_msg, item_desc, context)
# messages[0]['content'] += "\n▲知识库:\n" + self._fetch_tech_specs()

response = self.client.chat.completions.create(
model=os.getenv("MODEL_NAME", "qwen-max"),
messages=messages,
temperature=0.4,
temperature=0.8,
max_tokens=500,
top_p=0.8,
top_p=1,
extra_body={
"enable_search": True,
}
Expand All @@ -266,11 +261,6 @@ def generate(self, user_msg: str, item_desc: str, context: str, bargain_count: i
return self.safety_filter(response.choices[0].message.content)


# def _fetch_tech_specs(self) -> str:
# """模拟获取技术参数(可连接数据库)"""
# return "功率:200W@8Ω\n接口:XLR+RCA\n频响:20Hz-20kHz"


class ClassifyAgent(BaseAgent):
"""意图识别Agent"""

Expand All @@ -285,4 +275,4 @@ class DefaultAgent(BaseAgent):
def _call_llm(self, messages: List[Dict], *args) -> str:
"""限制默认回复长度"""
response = super()._call_llm(messages, temperature=0.7)
return response
return response
Loading