-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Adds options including specification of the chat model. Adds logging including tokens per second rate. Encapsulates code in a class with renamed methods for specificity. Adds model name string to web page.
- Loading branch information
Showing
3 changed files
with
172 additions
and
91 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,95 +1,161 @@ | ||
"""A simple web-based chat application using Flask and Ollama. | ||
This application allows users to interact with a locally running DeepSeek model | ||
"""A web-based chat application using Flask and Ollama. | ||
This application allows users to interact with a locally running DeepSeek model. | ||
""" | ||
import argparse | ||
import logging | ||
import re | ||
|
||
import markdown | ||
from flask import Flask, render_template, request | ||
from ollama import chat as ollama_chat | ||
|
||
VERBOSE = False | ||
|
||
app = Flask(__name__) | ||
|
||
messages = [] | ||
|
||
|
||
def get_chat_response(user_input: str) -> str: | ||
"""Get response from locally running Ollama model.""" | ||
try: | ||
messages.append({"role": "user", "content": user_input}) | ||
content = "" | ||
for part in ollama_chat( | ||
model="deepseek-r1:1.5b", | ||
messages=messages, | ||
options={ | ||
"seed": 42, | ||
"temperature": 0.6, | ||
}, | ||
stream=True, | ||
): | ||
chunk = part["message"]["content"] | ||
content += chunk | ||
if VERBOSE: | ||
print(chunk, end="", flush=True) | ||
print() | ||
messages.append({"role": "assistant", "content": content}) | ||
|
||
# Process the content to handle <think> blocks and markdown in HTML. | ||
|
||
# First, protect <think> blocks by replacing them temporarily | ||
DEFAULT_MODEL = "deepseek-r1:1.5b" | ||
DEFAULT_PORT = 5000 | ||
|
||
|
||
class ChatServer: | ||
def __init__(self): | ||
self.parser = self._init_argument_parser() | ||
self.args = self.parser.parse_args() | ||
self.app = Flask(__name__) | ||
self.messages = [] | ||
self._setup_logging() | ||
self._setup_routes() | ||
|
||
@staticmethod | ||
def _init_argument_parser(): | ||
parser = argparse.ArgumentParser( | ||
description="Run web server for DeepSeek-R1 chat application.") | ||
parser.add_argument( | ||
"--model", | ||
type=str, | ||
default=DEFAULT_MODEL, | ||
help=f"Specify Ollama supported LLM model to use (default: {DEFAULT_MODEL})" | ||
) | ||
parser.add_argument( | ||
"--network", | ||
action="store_true", | ||
help="Enable any device on the network to connect" | ||
) | ||
parser.add_argument( | ||
"--port", | ||
type=int, | ||
default=DEFAULT_PORT, | ||
help=f"Specify port to run the server on (default: {DEFAULT_PORT})" | ||
) | ||
parser.add_argument( | ||
"--verbose", | ||
action="store_true", | ||
help="Enable verbose logging output" | ||
) | ||
return parser | ||
|
||
def _setup_logging(self): | ||
"""Configure logging for the chat server.""" | ||
logging.basicConfig( | ||
level=logging.DEBUG if self.args.verbose else logging.INFO, | ||
format='%(asctime)s - %(levelname)s - %(message)s' | ||
) | ||
self.logger = logging.getLogger(__name__) | ||
|
||
def _setup_routes(self): | ||
self.app.route("/")(self._handle_home) | ||
self.app.route("/chat", methods=["POST"])(self._handle_chat_request) | ||
|
||
def _handle_chat_request(self): | ||
"""Handle chat API requests.""" | ||
submitted_input = request.form.get("user_input", "") | ||
chat_response = self.process_chat_response(submitted_input) | ||
return {"response": chat_response, "last_question": submitted_input} | ||
|
||
def _handle_home(self) -> str: | ||
"""Handle main page requests.""" | ||
if request.method != "POST": | ||
return render_template("index.html", | ||
user_input="", | ||
chat_response="", | ||
model=self.args.model) | ||
|
||
submitted_input = request.form.get("user_input", "") | ||
chat_response = self.process_chat_response(submitted_input) | ||
return render_template( | ||
"index.html", | ||
last_question=submitted_input, | ||
chat_response=chat_response, | ||
model=self.args.model | ||
) | ||
|
||
def _process_content(self, content: str) -> str: | ||
"""Process the content to handle <think> blocks and markdown in HTML.""" | ||
content_with_placeholders, think_blocks = self._protect_think_blocks(content) | ||
|
||
# Convert markdown to HTML | ||
processed_content = markdown.markdown( | ||
content_with_placeholders, extensions=["fenced_code"] | ||
) | ||
|
||
return self._restore_think_blocks(processed_content, think_blocks) | ||
|
||
def _protect_think_blocks(self, content: str) -> tuple[str, list]: | ||
"""Temporarily replace <think> blocks with placeholders.""" | ||
think_blocks = [] | ||
import re | ||
|
||
def save_think_block(match): | ||
think_blocks.append(match.group(0)) | ||
return f"THINK_BLOCK_{len(think_blocks) - 1}" | ||
|
||
# Save think blocks and replace with placeholders | ||
content_with_placeholders = re.sub( | ||
r"<think>[\s\S]*?</think>", save_think_block, content | ||
) | ||
return content_with_placeholders, think_blocks | ||
|
||
# Convert markdown to HTML | ||
processed_content = markdown.markdown( | ||
content_with_placeholders, extensions=["fenced_code"] | ||
) | ||
|
||
# Restore think blocks | ||
def _restore_think_blocks(self, content: str, think_blocks: list) -> str: | ||
"""Restore <think> blocks from placeholders.""" | ||
for i, block in enumerate(think_blocks): | ||
processed_content = processed_content.replace( | ||
f"<p>THINK_BLOCK_{i}</p>", block | ||
) | ||
if VERBOSE: | ||
print("Processed content:") | ||
print(processed_content) | ||
return processed_content | ||
except Exception as e: | ||
print(f"Error getting response from Ollama: {e}") | ||
return "Sorry, please try again." | ||
|
||
|
||
@app.route("/", methods=["GET", "POST"]) | ||
def index() -> str: | ||
"""Handle main page requests.""" | ||
if request.method != "POST": | ||
return render_template("index.html", user_input="", bot_response="") | ||
|
||
submitted_input = request.form.get("user_input", "") | ||
chat_response = get_chat_response(submitted_input) | ||
return render_template( | ||
"index.html", | ||
last_question=submitted_input, | ||
bot_response=chat_response, | ||
) | ||
|
||
|
||
@app.route("/chat", methods=["POST"]) | ||
def chat(): | ||
"""Handle chat API requests.""" | ||
submitted_input = request.form.get("user_input", "") | ||
chat_response = get_chat_response(submitted_input) | ||
return {"response": chat_response, "last_question": submitted_input} | ||
content = content.replace(f"<p>THINK_BLOCK_{i}</p>", block) | ||
return content | ||
|
||
def process_chat_response(self, user_input: str) -> str: | ||
"""Get response from locally running Ollama model.""" | ||
try: | ||
self.messages.append({"role": "user", "content": user_input}) | ||
content = "" | ||
for part in ollama_chat( | ||
model=self.args.model, | ||
messages=self.messages, | ||
options={ | ||
"seed": 42, | ||
"temperature": 0.6, | ||
}, | ||
stream=True, | ||
): | ||
chunk = part["message"]["content"] | ||
content += chunk | ||
if self.args.verbose: | ||
# This is a non-logging print to get streaming output. | ||
print(chunk, end="", flush=True) | ||
self.messages.append({"role": "assistant", "content": content}) | ||
|
||
duration = part.total_duration / 1e9 | ||
self.logger.info(f"Response generated in {duration:.3f}s ({(part.eval_count / duration):.1f} tok/s)") | ||
|
||
# Process the response content | ||
processed_content = self._process_content(content) | ||
return processed_content | ||
|
||
except Exception as e: | ||
self.logger.error(f"Error getting response from Ollama: {e}") | ||
return f"Sorry, something went wrong.\n{str(e)}" | ||
|
||
def run_server(self): | ||
config = { | ||
"debug": True, | ||
"port": self.args.port, | ||
"host": "0.0.0.0" if self.args.network else "127.0.0.1" | ||
} | ||
self.app.run(**config) | ||
|
||
|
||
if __name__ == "__main__": | ||
app.run(host="0.0.0.0", port=5000, debug=True) | ||
server = ChatServer() | ||
server.run_server() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters