Refactor chat server

Adds options including specification of the chat model. Adds logging including tokens per second rate. Encapsulates code in a class with renamed methods for specificity. Adds model name string to web page.
guynich · Feb 28, 2025 · 85f937d · 85f937d
1 parent 8b15d17
commit 85f937d
Show file tree

Hide file tree

Showing 3 changed files with 172 additions and 91 deletions.
diff --git a/README.md b/README.md
@@ -22,6 +22,7 @@ installation.
     - [Installation](#installation)
     - [Run](#run)
   - [Web server](#web-server)
+    - [Other models](#other-models)
 - [References](#references)
 - [Next steps](#next-steps)
 
@@ -251,6 +252,10 @@ rm -f .ollama/history
 This section contains several different examples showing the usage of a local
 version of DeepSeek-R1 model.
 
+> [!TIP]
+> Users looking for desktop and cloud applications to run models might
+> take a look at [AnythingLLM](https://anythingllm.com).
+
 ## Temperature (experimental)
 
 DeepSeek documentation recommends changing 
@@ -403,15 +408,25 @@ source ./venv_ollama/bin/activate
 python3 deepseek_opi5plus/browser/server.py
 ```
 
-Navigate to `http://127.0.0.1:5000` in a browser for the chat session.  Tested
-with Chromium browser on Ubuntu 22.04 on OrangePi 5, and in Safari browser on 
-MacOS.
+Navigate to `http://127.0.0.1:5000` in a browser for the chat session (or the
+provided IP address with `--network` option).  Tested with Chromium browser on
+Ubuntu 22.04 on OrangePi 5, and in Safari browser on MacOS.
 
 <img src="/images/chat_browser.png" alt="Web browser interface"/>
 
-The browser web page is updated after the model has finished generating text.  
+The browser web page is updated after the model has finished generating text.
 Context history is preserved during the session.
 
+### Other models
+
+You can select a different Ollama supported model using `--model` option.  First 
+make the model available by pulling it then run the server.
+```bash
+ollama pull deepseek-r1:7b
+
+python3 deepseek_opi5plus/browser/server.py --model "deepseek-r1:7b"
+```
+
 # References
 
 * Ollama.

diff --git a/browser/server.py b/browser/server.py
@@ -1,95 +1,161 @@
-"""A simple web-based chat application using Flask and Ollama.
-This application allows users to interact with a locally running DeepSeek model
+"""A web-based chat application using Flask and Ollama.
+This application allows users to interact with a locally running DeepSeek model.
 """
+import argparse
+import logging
+import re
 
 import markdown
 from flask import Flask, render_template, request
 from ollama import chat as ollama_chat
 
-VERBOSE = False
-
-app = Flask(__name__)
-
-messages = []
-
-
-def get_chat_response(user_input: str) -> str:
-    """Get response from locally running Ollama model."""
-    try:
-        messages.append({"role": "user", "content": user_input})
-        content = ""
-        for part in ollama_chat(
-            model="deepseek-r1:1.5b",
-            messages=messages,
-            options={
-                "seed": 42,
-                "temperature": 0.6,
-            },
-            stream=True,
-        ):
-            chunk = part["message"]["content"]
-            content += chunk
-            if VERBOSE:
-                print(chunk, end="", flush=True)
-        print()
-        messages.append({"role": "assistant", "content": content})
-
-        # Process the content to handle <think> blocks and markdown in HTML.
-
-        # First, protect <think> blocks by replacing them temporarily
+DEFAULT_MODEL = "deepseek-r1:1.5b"
+DEFAULT_PORT = 5000
+
+
+class ChatServer:
+    def __init__(self):
+        self.parser = self._init_argument_parser()
+        self.args = self.parser.parse_args()
+        self.app = Flask(__name__)
+        self.messages = []
+        self._setup_logging()
+        self._setup_routes()
+
+    @staticmethod
+    def _init_argument_parser():
+        parser = argparse.ArgumentParser(
+            description="Run web server for DeepSeek-R1 chat application.")
+        parser.add_argument(
+            "--model", 
+            type=str, 
+            default=DEFAULT_MODEL, 
+            help=f"Specify Ollama supported LLM model to use (default: {DEFAULT_MODEL})"
+        )
+        parser.add_argument(
+            "--network", 
+            action="store_true", 
+            help="Enable any device on the network to connect"
+        )
+        parser.add_argument(
+            "--port", 
+            type=int, 
+            default=DEFAULT_PORT, 
+            help=f"Specify port to run the server on (default: {DEFAULT_PORT})"
+        )
+        parser.add_argument(
+            "--verbose", 
+            action="store_true", 
+            help="Enable verbose logging output"
+        )
+        return parser
+
+    def _setup_logging(self):
+        """Configure logging for the chat server."""
+        logging.basicConfig(
+            level=logging.DEBUG if self.args.verbose else logging.INFO,
+            format='%(asctime)s - %(levelname)s - %(message)s'
+        )
+        self.logger = logging.getLogger(__name__)
+
+    def _setup_routes(self):
+        self.app.route("/")(self._handle_home)
+        self.app.route("/chat", methods=["POST"])(self._handle_chat_request)
+
+    def _handle_chat_request(self):
+        """Handle chat API requests."""
+        submitted_input = request.form.get("user_input", "")
+        chat_response = self.process_chat_response(submitted_input)
+        return {"response": chat_response, "last_question": submitted_input}
+
+    def _handle_home(self) -> str:
+        """Handle main page requests."""
+        if request.method != "POST":
+            return render_template("index.html", 
+                                 user_input="", 
+                                 chat_response="",
+                                 model=self.args.model)
+
+        submitted_input = request.form.get("user_input", "")
+        chat_response = self.process_chat_response(submitted_input)
+        return render_template(
+            "index.html",
+            last_question=submitted_input,
+            chat_response=chat_response,
+            model=self.args.model
+        )
+
+    def _process_content(self, content: str) -> str:
+        """Process the content to handle <think> blocks and markdown in HTML."""
+        content_with_placeholders, think_blocks = self._protect_think_blocks(content)
+
+        # Convert markdown to HTML
+        processed_content = markdown.markdown(
+            content_with_placeholders, extensions=["fenced_code"]
+        )
+
+        return self._restore_think_blocks(processed_content, think_blocks)
+
+    def _protect_think_blocks(self, content: str) -> tuple[str, list]:
+        """Temporarily replace <think> blocks with placeholders."""
         think_blocks = []
-        import re
 
         def save_think_block(match):
             think_blocks.append(match.group(0))
             return f"THINK_BLOCK_{len(think_blocks) - 1}"
 
-        # Save think blocks and replace with placeholders
         content_with_placeholders = re.sub(
             r"<think>[\s\S]*?</think>", save_think_block, content
         )
+        return content_with_placeholders, think_blocks
 
-        # Convert markdown to HTML
-        processed_content = markdown.markdown(
-            content_with_placeholders, extensions=["fenced_code"]
-        )
-
-        # Restore think blocks
+    def _restore_think_blocks(self, content: str, think_blocks: list) -> str:
+        """Restore <think> blocks from placeholders."""
         for i, block in enumerate(think_blocks):
-            processed_content = processed_content.replace(
-                f"<p>THINK_BLOCK_{i}</p>", block
-            )
-        if VERBOSE:
-            print("Processed content:")
-            print(processed_content)
-        return processed_content
-    except Exception as e:
-        print(f"Error getting response from Ollama: {e}")
-        return "Sorry, please try again."
-
-
-@app.route("/", methods=["GET", "POST"])
-def index() -> str:
-    """Handle main page requests."""
-    if request.method != "POST":
-        return render_template("index.html", user_input="", bot_response="")
-
-    submitted_input = request.form.get("user_input", "")
-    chat_response = get_chat_response(submitted_input)
-    return render_template(
-        "index.html",
-        last_question=submitted_input,
-        bot_response=chat_response,
-    )
-
-
-@app.route("/chat", methods=["POST"])
-def chat():
-    """Handle chat API requests."""
-    submitted_input = request.form.get("user_input", "")
-    chat_response = get_chat_response(submitted_input)
-    return {"response": chat_response, "last_question": submitted_input}
+            content = content.replace(f"<p>THINK_BLOCK_{i}</p>", block)
+        return content
+
+    def process_chat_response(self, user_input: str) -> str:
+        """Get response from locally running Ollama model."""
+        try:
+            self.messages.append({"role": "user", "content": user_input})
+            content = ""
+            for part in ollama_chat(
+                model=self.args.model,
+                messages=self.messages,
+                options={
+                    "seed": 42,
+                    "temperature": 0.6,
+                },
+                stream=True,
+            ):
+                chunk = part["message"]["content"]
+                content += chunk
+                if self.args.verbose:
+                    # This is a non-logging print to get streaming output.
+                    print(chunk, end="", flush=True)
+            self.messages.append({"role": "assistant", "content": content})
+
+            duration = part.total_duration / 1e9
+            self.logger.info(f"Response generated in {duration:.3f}s ({(part.eval_count / duration):.1f} tok/s)")
+
+            # Process the response content
+            processed_content = self._process_content(content)
+            return processed_content
+
+        except Exception as e:
+            self.logger.error(f"Error getting response from Ollama: {e}")
+            return f"Sorry, something went wrong.\n{str(e)}"
+
+    def run_server(self):
+        config = {
+            "debug": True,
+            "port": self.args.port,
+            "host": "0.0.0.0" if self.args.network else "127.0.0.1"
+        }
+        self.app.run(**config)
 
 
 if __name__ == "__main__":
-    app.run(host="0.0.0.0", port=5000, debug=True)
+    server = ChatServer()
+    server.run_server()
diff --git a/browser/templates/index.html b/browser/templates/index.html
@@ -3,16 +3,16 @@
 <head>
     <meta charset="UTF-8">
     <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>Local chat using DeepSeek-R1 and Ollama</title>
+    <title>Local chat using Ollama</title>
     <style>
         body { font-family: Arial, sans-serif; background-color: #f0f0f0; margin: 0; padding: 0;}
         .container { width: 50%; margin: 0 auto; padding: 20px; background-color: #fff; border-radius: 8px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); }
         h1 { text-align: center; }
         .chat-box { margin-top: 0; border-top: 2px solid #ddd; padding-top: 20px;}
-        .chat-box .user-message, .chat-box .bot-message { padding: 10px; margin: 10px 0; border-radius: 5px;}
+        .chat-box .user-message, .chat-box .chat-message { padding: 10px; margin: 10px 0; border-radius: 5px;}
         .user-message { background-color: #e1f5fe; text-align: right;}
-        .bot-message { background-color: #f1f1f1; white-space: pre-wrap; padding: 10px; margin: 10px 0; border-radius: 5px; }
-        .bot-message think { 
+        .chat-message { background-color: #f1f1f1; white-space: pre-wrap; padding: 10px; margin: 10px 0; border-radius: 5px; }
+        .chat-message think { 
             color: #666;
             font-style: italic;
             border-left: 3px solid #57b368;
@@ -24,22 +24,22 @@
             word-wrap: break-word;       /* Break long words if needed */
             overflow-wrap: break-word;   /* Modern browsers */
         }
-        .bot-message pre {
+        .chat-message pre {
             background-color: #f8f9fa;
             padding: 10px;
             border-radius: 4px;
             overflow-x: auto;
         }
-        .bot-message code {
+        .chat-message code {
             font-family: 'Courier New', Courier, monospace;
             background-color: #f8f9fa;
             padding: 2px 4px;
             border-radius: 3px;
         }
-        .bot-message p {
+        .chat-message p {
             margin: 0 0 1em 0;
         }
-        .bot-message ul, .bot-message ol {
+        .chat-message ul, .chat-message ol {
             margin-left: 20px;
         }
         form { margin-top: 20px; margin-bottom: 20px; }
@@ -73,11 +73,11 @@
 </head>
 <body>
     <div class="container">
-        <h1>Reasoning with DeepSeek-R1</h1>
+        <h1>Chat with model "{{ model }}"</h1>
         <form id="chat-form" method="POST">
             <input type="text" 
                    name="user_input" 
-                   placeholder="Ask me something that needs reasoning..." 
+                   placeholder="Ask me something ..." 
                    value="" 
                    required>
             <button type="submit">Send</button>
@@ -88,7 +88,7 @@ <h1>Reasoning with DeepSeek-R1</h1>
         <div class="chat-box">
             {% if last_question %}
                 <div class="user-message">{{ last_question }}</div>
-                <div class="bot-message">{{ bot_response|safe }}</div>
+                <div class="chat-message">{{ chat_response|safe }}</div>
             {% endif %}
         </div>
     </div>
@@ -115,7 +115,7 @@ <h1>Reasoning with DeepSeek-R1</h1>
                 // Add new messages to chat box
                 chatBox.innerHTML = `
                     <div class="user-message">${data.last_question}</div>
-                    <div class="bot-message">${data.response}</div>
+                    <div class="chat-message">${data.response}</div>
                     ${chatBox.innerHTML}
                 `;