Skip to content

Commit 4075677

Browse files
authored
Add OpenAI backend to the CI test (#869)
1 parent 9e8d2c7 commit 4075677

16 files changed

+30
-38
lines changed

.github/workflows/pr-e2e-test.yml

+7-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@ jobs:
3232
pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
3333
pip install --upgrade transformers
3434
35-
- name: Launch server and run benchmark
35+
- name: Test OpenAI Backend
36+
run: |
37+
export OPENAI_API_KEY=secrets.OPENAI_API_KEY
38+
cd sglang/test/lang
39+
python3 test_openai_backend.py
40+
41+
- name: Benchmark Serving
3642
run: |
3743
python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
3844

.github/workflows/release-docker.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: publish docker
1+
name: Release Docker
22
on:
33
push:
44
branches:

.github/workflows/release-fake-tag.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: fake tag
1+
name: Release Fake Tag
22
on:
33
push:
44
branches:

.github/workflows/release-github.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: release tag
1+
name: Release GitHub
22
on:
33
workflow_dispatch:
44
jobs:

.github/workflows/release-pypi.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
name: publish to pypi
1+
name: Release PyPI
22
on:
33
push:
44
branches:

assets/llama_7b.jpg

-231 KB
Binary file not shown.

assets/mixtral_8x7b.jpg

-157 KB
Binary file not shown.

docs/en/benchmark_results.md

-22
This file was deleted.

docs/en/model_support.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,6 @@ To port a model from vLLM to SGLang, you can compare these two files [SGLang LLa
1111
- Change `forward()` functions, and add `input_metadata`.
1212
- Add `EntryClass` at the end.
1313
- Test correctness by comparing the final logits and outputs of the two following commands:
14-
- `python3 playground/reference_hf.py --model [new model]`
14+
- `python3 scripts/playground/reference_hf.py --model [new model]`
1515
- `python3 -m sglang.bench_latency --model [new model] --correct --output-len 16 --trust-remote-code`
1616
- Update [Supported Models](https://github.com/sgl-project/sglang/tree/main?tab=readme-ov-file#supported-models) at [README](../README.md).
File renamed without changes.
File renamed without changes.
File renamed without changes.

test/__init__.py

Whitespace-only changes.

test/lang/test_bind_cache.py

+6
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,9 @@
1+
"""
2+
Usage:
3+
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000
4+
python3 test_bind_cache.py
5+
"""
6+
17
import unittest
28

39
import sglang as sgl

test/lang/test_srt_backend.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
"""
2-
python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
2+
Usage:
3+
python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000
4+
python3 test_srt_backend.py
35
"""
46

57
import json

test/lang/test_tracing.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ def few_shot_qa(s, question):
1616
s += "A:" + sgl.gen("answer", stop="\n")
1717

1818
tracer = few_shot_qa.trace()
19-
print(tracer.last_node.print_graph_dfs() + "\n")
19+
# print(tracer.last_node.print_graph_dfs() + "\n")
2020

2121
def test_select(self):
2222
@sgl.function
@@ -26,7 +26,7 @@ def capital(s):
2626
s += "It is a city" + sgl.gen("description", stop=".")
2727

2828
tracer = capital.trace()
29-
print(tracer.last_node.print_graph_dfs() + "\n")
29+
# print(tracer.last_node.print_graph_dfs() + "\n")
3030

3131
def test_raise_warning(self):
3232
@sgl.function
@@ -66,11 +66,11 @@ def tip_suggestion(s, topic):
6666
s += "In summary" + sgl.gen("summary")
6767

6868
compiled = tip_suggestion.compile()
69-
compiled.print_graph()
69+
# compiled.print_graph()
7070

7171
sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
7272
state = compiled.run(topic="staying healthy")
73-
print(state.text() + "\n")
73+
# print(state.text() + "\n")
7474

7575
states = compiled.run_batch(
7676
[
@@ -80,8 +80,8 @@ def tip_suggestion(s, topic):
8080
],
8181
temperature=0,
8282
)
83-
for s in states:
84-
print(s.text() + "\n")
83+
# for s in states:
84+
# print(s.text() + "\n")
8585

8686
def test_role(self):
8787
@sgl.function
@@ -95,7 +95,7 @@ def multi_turn_chat(s):
9595
backend.chat_template = get_chat_template("llama-2-chat")
9696

9797
compiled = multi_turn_chat.compile(backend=backend)
98-
compiled.print_graph()
98+
# compiled.print_graph()
9999

100100
def test_fork(self):
101101
@sgl.function
@@ -118,10 +118,10 @@ def tip_suggestion(s):
118118
s += "In summary" + sgl.gen("summary")
119119

120120
tracer = tip_suggestion.trace()
121-
print(tracer.last_node.print_graph_dfs())
121+
# print(tracer.last_node.print_graph_dfs())
122122

123123
a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
124-
print(a.text())
124+
# print(a.text())
125125

126126

127127
if __name__ == "__main__":

0 commit comments

Comments
 (0)