-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathllm.py
More file actions
48 lines (36 loc) · 974 Bytes
/
llm.py
File metadata and controls
48 lines (36 loc) · 974 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
from llama_cpp import Llama
import os
BASE_DIR = os.path.dirname(__file__)
MODEL_PATH = os.path.join(BASE_DIR, "models", "tinyllama.gguf")
llm = Llama(
model_path=MODEL_PATH,
n_ctx=2048,
n_threads=4
)
SYSTEM_PROMPT = """
You are a phone call assistant.
STRICT RULES:
- You only generate the assistant reply.
- You NEVER invent caller speech.
- You NEVER answer general knowledge questions.
- You only help take a message.
Your task:
1. Ask caller name
2. Ask reason for calling
3. Acknowledge politely
"""
def generate_reply(state, user_text):
prompt = f"""
{SYSTEM_PROMPT}
Current step: {state}
User said: "{user_text}"
Assistant reply:
"""
out = llm(prompt, max_tokens=60, temperature=0.1)
text = out["choices"][0]["text"].strip()
# Safety cleanup
text = text.split("\n")[0]
text = text.replace("Caller:", "").replace("Assistant:", "").strip()
if not text:
return "Sorry, could you repeat that?"
return text