Support custom prompts for LLaMA models

kharvd · kharvd · commit dddeaa7c557a · 2023-06-02T21:12:24.000-04:00
diff --git a/gptcli/config.py b/gptcli/config.py
@@ -4,6 +4,8 @@
 import yaml
 
 from gptcli.assistant import AssistantConfig
+from gptcli.llama import LLaMAModelConfig
+
 
 CONFIG_FILE_PATHS = [
     os.path.join(os.path.expanduser("~"), ".config", "gpt-cli", "gpt.yml"),
@@ -24,7 +26,7 @@ class GptCliConfig:
     log_level: str = "INFO"
     assistants: Dict[str, AssistantConfig] = {}
     interactive: Optional[bool] = None
-    llama_models: Optional[Dict[str, str]] = None
+    llama_models: Optional[Dict[str, LLaMAModelConfig]] = None
 
 
 def choose_config_file(paths: List[str]) -> str:
diff --git a/gptcli/llama.py b/gptcli/llama.py
@@ -1,61 +1,70 @@
 import os
-from pathlib import Path
 import sys
-from typing import Iterator, List, Optional
-from llama_cpp import Llama
+from typing import Iterator, List, Optional, TypedDict, cast
+from llama_cpp import Completion, CompletionChunk, Llama
 
 from gptcli.completion import CompletionProvider, Message
 
-LLAMA_MODELS: Optional[dict[str, str]] = None
 
+class LLaMAModelConfig(TypedDict):
+    path: str
+    human_prompt: str
+    assistant_prompt: str
 
-def init_llama_models(model_paths: dict[str, str]):
-    for name, path in model_paths.items():
-        if not os.path.isfile(path):
-            print(f"LLaMA model {name} not found at {path}.")
+
+LLAMA_MODELS: Optional[dict[str, LLaMAModelConfig]] = None
+
+
+def init_llama_models(models: dict[str, LLaMAModelConfig]):
+    for name, model_config in models.items():
+        if not os.path.isfile(model_config["path"]):
+            print(f"LLaMA model {name} not found at {model_config['path']}.")
             sys.exit(1)
         if not name.startswith("llama"):
             print(f"LLaMA model names must start with `llama`, but got `{name}`.")
             sys.exit(1)
 
     global LLAMA_MODELS
-    LLAMA_MODELS = model_paths
+    LLAMA_MODELS = models
 
 
-def role_to_name(role: str) -> str:
+def role_to_name(role: str, model_config: LLaMAModelConfig) -> str:
     if role == "system" or role == "user":
-        return "### Human: "
+        return model_config["human_prompt"]
     elif role == "assistant":
-        return "### Assistant: "
+        return model_config["assistant_prompt"]
     else:
         raise ValueError(f"Unknown role: {role}")
 
 
-def make_prompt(messages: List[Message]) -> str:
+def make_prompt(messages: List[Message], model_config: LLaMAModelConfig) -> str:
     prompt = "\n".join(
-        [f"{role_to_name(message['role'])}{message['content']}" for message in messages]
+        [
+            f"{role_to_name(message['role'], model_config)} {message['content']}"
+            for message in messages
+        ]
     )
-    prompt += "### Assistant:"
+    prompt += f"\n{model_config['assistant_prompt']}"
     return prompt
 
 
-END_SEQ = "### Human:"
-
-
 class LLaMACompletionProvider(CompletionProvider):
     def complete(
         self, messages: List[Message], args: dict, stream: bool = False
     ) -> Iterator[str]:
         assert LLAMA_MODELS, "LLaMA models not initialized"
 
+        model_config = LLAMA_MODELS[args["model"]]
+
         with suppress_stderr():
             llm = Llama(
-                model_path=LLAMA_MODELS[args["model"]],
+                model_path=model_config["path"],
                 n_ctx=2048,
                 verbose=False,
                 use_mlock=True,
             )
-        prompt = make_prompt(messages)
+        prompt = make_prompt(messages, model_config)
+        print(prompt)
 
         extra_args = {}
         if "temperature" in args:
@@ -66,16 +75,16 @@ def complete(
         gen = llm.create_completion(
             prompt,
             max_tokens=1024,
-            stop=END_SEQ,
+            stop=model_config["human_prompt"],
             stream=stream,
             echo=False,
             **extra_args,
         )
         if stream:
-            for x in gen:
+            for x in cast(Iterator[CompletionChunk], gen):
                 yield x["choices"][0]["text"]
         else:
-            yield gen["choices"][0]["text"]
+            yield cast(Completion, gen)["choices"][0]["text"]
 
 
 # https://stackoverflow.com/a/50438156