Skip to content

Commit 11b609f

Browse files
authored
[Feature] Add Model Management UI (#3)
* add pyinstaller spec, update top_k default value; update readme * add modelui; update documentation * Update README.md --------- Co-authored-by: tjtanaa <[email protected]>
1 parent e7ac69f commit 11b609f

15 files changed

+623
-23
lines changed

README.md

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# EmbeddedLLM
22

3-
Run local LLMs on iGPU, APU and CPU (AMD , Intel, and Qualcomm (Coming Soon))
3+
Run local LLMs on iGPU, APU and CPU (AMD , Intel, and Qualcomm (Coming Soon)).
44
Easiest way to launch OpenAI API Compatible Server on Windows, Linux and MacOS
55

66
| Support matrix | Supported now | Under Development | On the roadmap |
@@ -32,6 +32,10 @@ Easiest way to launch OpenAI API Compatible Server on Windows, Linux and MacOS
3232
| Phi3-mini-128k-instruct | 3.8B | 128k | [microsoft/Phi-3-mini-128k-instruct-onnx](https://huggingface.co/microsoft/Phi-3-mini-128k-instruct-onnx) |
3333
| Phi3-medium-4k-instruct | 17B | 4096 | [microsoft/Phi-3-medium-4k-instruct-onnx-directml](https://huggingface.co/microsoft/Phi-3-medium-4k-instruct-onnx-directml) |
3434
| Phi3-medium-128k-instruct | 17B | 128k | [microsoft/Phi-3-medium-128k-instruct-onnx-directml](https://huggingface.co/microsoft/Phi-3-medium-128k-instruct-onnx-directml) |
35+
| Openchat-3.6-8b | 8B | 8192 | [EmbeddedLLM/openchat-3.6-8b-20240522-onnx](https://huggingface.co/EmbeddedLLM/openchat-3.6-8b-20240522-onnx) |
36+
| Yi-1.5-6b-chat | 6B | 32k | [EmbeddedLLM/01-ai_Yi-1.5-6B-Chat-onnx](https://huggingface.co/EmbeddedLLM/01-ai_Yi-1.5-6B-Chat-onnx) |
37+
| Phi-3-vision-128k-instruct | | 128k | [EmbeddedLLM/Phi-3-vision-128k-instruct-onnx](https://huggingface.co/EmbeddedLLM/Phi-3-vision-128k-instruct-onnx/tree/main/onnx/cpu_and_mobile/cpu-int4-rtn-block-32-acc-level-4) |
38+
3539

3640
## Getting Started
3741

@@ -87,6 +91,18 @@ options:
8791

8892
1. `ellm_chatbot --port 7788 --host localhost --server_port <ellm_server_port> --server_host localhost`.
8993

94+
![Chatbot Web UI](asset/ellm_chatbot_vid.webp)
95+
96+
## Launch Model Management UI
97+
It is an interface that allows you to download and deploy OpenAI API compatible server.
98+
You can find out the disk space required to download the model in the UI.
99+
100+
1. `ellm_modelui --port 6678`
101+
102+
![Model Management UI](asset/ellm_modelui.png)
103+
104+
105+
90106
## Acknowledgements
91107

92-
- Excellent open-source projects: [vLLM](https://github.com/vllm-project/vllm.git), [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai.git) and many others.
108+
- Excellent open-source projects: [vLLM](https://github.com/vllm-project/vllm.git), [onnxruntime-genai](https://github.com/microsoft/onnxruntime-genai.git) and many others.

asset/ellm_chatbot_vid.webp

13.1 MB
Binary file not shown.

asset/ellm_modelui.png

404 KB
Loading

ellm_api_server.spec

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# -*- mode: python ; coding: utf-8 -*-
2+
3+
from pathlib import Path
4+
from PyInstaller.utils.hooks import collect_all
5+
6+
binaries_list = []
7+
8+
print(Path("src/owl/entrypoints/api.py").resolve().as_posix())
9+
10+
datas_list = [
11+
(Path("src/embeddedllm/entrypoints/api_server.py").resolve().as_posix(), 'embeddedllm/entrypoints')
12+
]
13+
14+
hiddenimports_list = ['multipart']
15+
16+
def add_package(package_name):
17+
datas, binaries, hiddenimports = collect_all(package_name)
18+
datas_list.extend(datas)
19+
binaries_list.extend(binaries)
20+
hiddenimports_list.extend(hiddenimports)
21+
22+
add_package('onnxruntime')
23+
add_package('onnxruntime_genai')
24+
25+
print(binaries_list)
26+
with open("binary.txt", 'w') as f:
27+
f.write(str(binaries_list))
28+
29+
a = Analysis(
30+
['src\\embeddedllm\\entrypoints\\api_server.py'],
31+
pathex=[],
32+
binaries=binaries_list,
33+
datas=datas_list,
34+
hiddenimports=hiddenimports_list,
35+
hookspath=[],
36+
hooksconfig={},
37+
runtime_hooks=[],
38+
excludes=[],
39+
noarchive=False,
40+
optimize=0,
41+
)
42+
pyz = PYZ(a.pure)
43+
44+
exe = EXE(
45+
pyz,
46+
a.scripts,
47+
[],
48+
exclude_binaries=True,
49+
name='ellm_api_server',
50+
debug=False,
51+
bootloader_ignore_signals=False,
52+
strip=False,
53+
upx=True,
54+
console=True,
55+
disable_windowed_traceback=False,
56+
argv_emulation=False,
57+
target_arch=None,
58+
codesign_identity=None,
59+
entitlements_file=None,
60+
)
61+
coll = COLLECT(
62+
exe,
63+
a.binaries,
64+
a.datas,
65+
strip=False,
66+
upx=True,
67+
upx_exclude=[],
68+
name='ellm_api_server',
69+
)

scripts/python/httpx_client_stream.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
import asyncio
2-
3-
import httpx
42
import json
53

6-
def parse_stream(stream:str):
4+
import httpx
75

8-
stream = stream.replace('data: ', '')
96

7+
def parse_stream(stream: str):
8+
stream = stream.replace("data: ", "")
109
response_obj = json.loads(stream)
1110
# print(response_obj)
1211

1312
return response_obj
1413

14+
1515
async def stream_chat_completion(url: str, payload: dict):
1616
async with httpx.AsyncClient() as client:
1717
async with client.stream("POST", url, json=payload) as response:
@@ -22,9 +22,9 @@ async def stream_chat_completion(url: str, payload: dict):
2222
if "[DONE]" in decodes_stream:
2323
continue
2424
resp = parse_stream(decodes_stream)
25-
if resp["choices"][0]["delta"].get('content', None):
26-
print(resp["choices"][0]["delta"]["content"], end='', flush=True)
27-
25+
if resp["choices"][0]["delta"].get("content", None):
26+
print(resp["choices"][0]["delta"]["content"], end="", flush=True)
27+
2828
# time.sleep(1)
2929
else:
3030
print(f"Error: {response.status_code}")

scripts/python/httpx_client_vision.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,10 @@
1-
import httpx
2-
import os
31
import base64
42
import mimetypes
3+
import os
4+
5+
import httpx
56

6-
from embeddedllm.protocol import (
7-
CustomChatCompletionMessageParam,
8-
)
7+
from embeddedllm.protocol import CustomChatCompletionMessageParam
98

109

1110
def chat_completion(url: str, payload: dict):

scripts/python/httpx_client_vision_stream.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import asyncio
2-
import os
32
import base64
43
import mimetypes
4+
import os
5+
56
import httpx
67

78

scripts/python/litellm_vision_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
1-
import litellm
21
import base64
32
import mimetypes
43
import os
54

5+
import litellm
6+
67
current_file_path = os.path.abspath(__file__)
78
IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
89

scripts/python/openai_vision_client.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1-
from openai import AsyncOpenAI
21
import asyncio
32
import base64
43
import mimetypes
54
import os
65

6+
from openai import AsyncOpenAI
7+
78
current_file_path = os.path.abspath(__file__)
89
IMAGE_PATH = os.path.join(os.path.dirname(current_file_path), "..", "images", "catdog.png")
910

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import io
22
import os
3+
import platform
34
import re
45
from typing import List
5-
import platform
66

77
from setuptools import find_packages, setup
88

@@ -140,6 +140,7 @@ def get_ellm_version() -> str:
140140
"console_scripts": [
141141
"ellm_server=embeddedllm.entrypoints.api_server:main",
142142
"ellm_chatbot=embeddedllm.entrypoints.webui:main",
143+
"ellm_modelui=embeddedllm.entrypoints.modelui:main",
143144
],
144145
},
145146
)

0 commit comments

Comments
 (0)