Skip to content

Commit

Permalink
Refactor chat server
Browse files Browse the repository at this point in the history
Adds options including specification of the chat model.  Adds logging including tokens per second rate. Encapsulates code in a class with renamed methods for specificity.  Adds model name string to web page.
  • Loading branch information
guynich committed Feb 28, 2025
1 parent 8b15d17 commit 85f937d
Show file tree
Hide file tree
Showing 3 changed files with 172 additions and 91 deletions.
23 changes: 19 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ installation.
- [Installation](#installation)
- [Run](#run)
- [Web server](#web-server)
- [Other models](#other-models)
- [References](#references)
- [Next steps](#next-steps)

Expand Down Expand Up @@ -251,6 +252,10 @@ rm -f .ollama/history
This section contains several different examples showing the usage of a local
version of DeepSeek-R1 model.

> [!TIP]
> Users looking for desktop and cloud applications to run models might
> take a look at [AnythingLLM](https://anythingllm.com).

## Temperature (experimental)

DeepSeek documentation recommends changing
Expand Down Expand Up @@ -403,15 +408,25 @@ source ./venv_ollama/bin/activate
python3 deepseek_opi5plus/browser/server.py
```

Navigate to `http://127.0.0.1:5000` in a browser for the chat session. Tested
with Chromium browser on Ubuntu 22.04 on OrangePi 5, and in Safari browser on
MacOS.
Navigate to `http://127.0.0.1:5000` in a browser for the chat session (or the
provided IP address with `--network` option). Tested with Chromium browser on
Ubuntu 22.04 on OrangePi 5, and in Safari browser on MacOS.

<img src="/images/chat_browser.png" alt="Web browser interface"/>

The browser web page is updated after the model has finished generating text.
The browser web page is updated after the model has finished generating text.
Context history is preserved during the session.

### Other models

You can select a different Ollama supported model using `--model` option. First
make the model available by pulling it then run the server.
```bash
ollama pull deepseek-r1:7b
python3 deepseek_opi5plus/browser/server.py --model "deepseek-r1:7b"
```

# References

* Ollama.
Expand Down
216 changes: 141 additions & 75 deletions browser/server.py
Original file line number Diff line number Diff line change
@@ -1,95 +1,161 @@
"""A simple web-based chat application using Flask and Ollama.
This application allows users to interact with a locally running DeepSeek model
"""A web-based chat application using Flask and Ollama.
This application allows users to interact with a locally running DeepSeek model.
"""
import argparse
import logging
import re

import markdown
from flask import Flask, render_template, request
from ollama import chat as ollama_chat

VERBOSE = False

app = Flask(__name__)

messages = []


def get_chat_response(user_input: str) -> str:
"""Get response from locally running Ollama model."""
try:
messages.append({"role": "user", "content": user_input})
content = ""
for part in ollama_chat(
model="deepseek-r1:1.5b",
messages=messages,
options={
"seed": 42,
"temperature": 0.6,
},
stream=True,
):
chunk = part["message"]["content"]
content += chunk
if VERBOSE:
print(chunk, end="", flush=True)
print()
messages.append({"role": "assistant", "content": content})

# Process the content to handle <think> blocks and markdown in HTML.

# First, protect <think> blocks by replacing them temporarily
DEFAULT_MODEL = "deepseek-r1:1.5b"
DEFAULT_PORT = 5000


class ChatServer:
def __init__(self):
self.parser = self._init_argument_parser()
self.args = self.parser.parse_args()
self.app = Flask(__name__)
self.messages = []
self._setup_logging()
self._setup_routes()

@staticmethod
def _init_argument_parser():
parser = argparse.ArgumentParser(
description="Run web server for DeepSeek-R1 chat application.")
parser.add_argument(
"--model",
type=str,
default=DEFAULT_MODEL,
help=f"Specify Ollama supported LLM model to use (default: {DEFAULT_MODEL})"
)
parser.add_argument(
"--network",
action="store_true",
help="Enable any device on the network to connect"
)
parser.add_argument(
"--port",
type=int,
default=DEFAULT_PORT,
help=f"Specify port to run the server on (default: {DEFAULT_PORT})"
)
parser.add_argument(
"--verbose",
action="store_true",
help="Enable verbose logging output"
)
return parser

def _setup_logging(self):
"""Configure logging for the chat server."""
logging.basicConfig(
level=logging.DEBUG if self.args.verbose else logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
self.logger = logging.getLogger(__name__)

def _setup_routes(self):
self.app.route("/")(self._handle_home)
self.app.route("/chat", methods=["POST"])(self._handle_chat_request)

def _handle_chat_request(self):
"""Handle chat API requests."""
submitted_input = request.form.get("user_input", "")
chat_response = self.process_chat_response(submitted_input)
return {"response": chat_response, "last_question": submitted_input}

def _handle_home(self) -> str:
"""Handle main page requests."""
if request.method != "POST":
return render_template("index.html",
user_input="",
chat_response="",
model=self.args.model)

submitted_input = request.form.get("user_input", "")
chat_response = self.process_chat_response(submitted_input)
return render_template(
"index.html",
last_question=submitted_input,
chat_response=chat_response,
model=self.args.model
)

def _process_content(self, content: str) -> str:
"""Process the content to handle <think> blocks and markdown in HTML."""
content_with_placeholders, think_blocks = self._protect_think_blocks(content)

# Convert markdown to HTML
processed_content = markdown.markdown(
content_with_placeholders, extensions=["fenced_code"]
)

return self._restore_think_blocks(processed_content, think_blocks)

def _protect_think_blocks(self, content: str) -> tuple[str, list]:
"""Temporarily replace <think> blocks with placeholders."""
think_blocks = []
import re

def save_think_block(match):
think_blocks.append(match.group(0))
return f"THINK_BLOCK_{len(think_blocks) - 1}"

# Save think blocks and replace with placeholders
content_with_placeholders = re.sub(
r"<think>[\s\S]*?</think>", save_think_block, content
)
return content_with_placeholders, think_blocks

# Convert markdown to HTML
processed_content = markdown.markdown(
content_with_placeholders, extensions=["fenced_code"]
)

# Restore think blocks
def _restore_think_blocks(self, content: str, think_blocks: list) -> str:
"""Restore <think> blocks from placeholders."""
for i, block in enumerate(think_blocks):
processed_content = processed_content.replace(
f"<p>THINK_BLOCK_{i}</p>", block
)
if VERBOSE:
print("Processed content:")
print(processed_content)
return processed_content
except Exception as e:
print(f"Error getting response from Ollama: {e}")
return "Sorry, please try again."


@app.route("/", methods=["GET", "POST"])
def index() -> str:
"""Handle main page requests."""
if request.method != "POST":
return render_template("index.html", user_input="", bot_response="")

submitted_input = request.form.get("user_input", "")
chat_response = get_chat_response(submitted_input)
return render_template(
"index.html",
last_question=submitted_input,
bot_response=chat_response,
)


@app.route("/chat", methods=["POST"])
def chat():
"""Handle chat API requests."""
submitted_input = request.form.get("user_input", "")
chat_response = get_chat_response(submitted_input)
return {"response": chat_response, "last_question": submitted_input}
content = content.replace(f"<p>THINK_BLOCK_{i}</p>", block)
return content

def process_chat_response(self, user_input: str) -> str:
"""Get response from locally running Ollama model."""
try:
self.messages.append({"role": "user", "content": user_input})
content = ""
for part in ollama_chat(
model=self.args.model,
messages=self.messages,
options={
"seed": 42,
"temperature": 0.6,
},
stream=True,
):
chunk = part["message"]["content"]
content += chunk
if self.args.verbose:
# This is a non-logging print to get streaming output.
print(chunk, end="", flush=True)
self.messages.append({"role": "assistant", "content": content})

duration = part.total_duration / 1e9
self.logger.info(f"Response generated in {duration:.3f}s ({(part.eval_count / duration):.1f} tok/s)")

# Process the response content
processed_content = self._process_content(content)
return processed_content

except Exception as e:
self.logger.error(f"Error getting response from Ollama: {e}")
return f"Sorry, something went wrong.\n{str(e)}"

def run_server(self):
config = {
"debug": True,
"port": self.args.port,
"host": "0.0.0.0" if self.args.network else "127.0.0.1"
}
self.app.run(**config)


if __name__ == "__main__":
app.run(host="0.0.0.0", port=5000, debug=True)
server = ChatServer()
server.run_server()
24 changes: 12 additions & 12 deletions browser/templates/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Local chat using DeepSeek-R1 and Ollama</title>
<title>Local chat using Ollama</title>
<style>
body { font-family: Arial, sans-serif; background-color: #f0f0f0; margin: 0; padding: 0;}
.container { width: 50%; margin: 0 auto; padding: 20px; background-color: #fff; border-radius: 8px; box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); }
h1 { text-align: center; }
.chat-box { margin-top: 0; border-top: 2px solid #ddd; padding-top: 20px;}
.chat-box .user-message, .chat-box .bot-message { padding: 10px; margin: 10px 0; border-radius: 5px;}
.chat-box .user-message, .chat-box .chat-message { padding: 10px; margin: 10px 0; border-radius: 5px;}
.user-message { background-color: #e1f5fe; text-align: right;}
.bot-message { background-color: #f1f1f1; white-space: pre-wrap; padding: 10px; margin: 10px 0; border-radius: 5px; }
.bot-message think {
.chat-message { background-color: #f1f1f1; white-space: pre-wrap; padding: 10px; margin: 10px 0; border-radius: 5px; }
.chat-message think {
color: #666;
font-style: italic;
border-left: 3px solid #57b368;
Expand All @@ -24,22 +24,22 @@
word-wrap: break-word; /* Break long words if needed */
overflow-wrap: break-word; /* Modern browsers */
}
.bot-message pre {
.chat-message pre {
background-color: #f8f9fa;
padding: 10px;
border-radius: 4px;
overflow-x: auto;
}
.bot-message code {
.chat-message code {
font-family: 'Courier New', Courier, monospace;
background-color: #f8f9fa;
padding: 2px 4px;
border-radius: 3px;
}
.bot-message p {
.chat-message p {
margin: 0 0 1em 0;
}
.bot-message ul, .bot-message ol {
.chat-message ul, .chat-message ol {
margin-left: 20px;
}
form { margin-top: 20px; margin-bottom: 20px; }
Expand Down Expand Up @@ -73,11 +73,11 @@
</head>
<body>
<div class="container">
<h1>Reasoning with DeepSeek-R1</h1>
<h1>Chat with model "{{ model }}"</h1>
<form id="chat-form" method="POST">
<input type="text"
name="user_input"
placeholder="Ask me something that needs reasoning..."
placeholder="Ask me something ..."
value=""
required>
<button type="submit">Send</button>
Expand All @@ -88,7 +88,7 @@ <h1>Reasoning with DeepSeek-R1</h1>
<div class="chat-box">
{% if last_question %}
<div class="user-message">{{ last_question }}</div>
<div class="bot-message">{{ bot_response|safe }}</div>
<div class="chat-message">{{ chat_response|safe }}</div>
{% endif %}
</div>
</div>
Expand All @@ -115,7 +115,7 @@ <h1>Reasoning with DeepSeek-R1</h1>
// Add new messages to chat box
chatBox.innerHTML = `
<div class="user-message">${data.last_question}</div>
<div class="bot-message">${data.response}</div>
<div class="chat-message">${data.response}</div>
${chatBox.innerHTML}
`;

Expand Down

0 comments on commit 85f937d

Please sign in to comment.