sgl-project
diff --git a/‎.github/workflows/pr-e2e-test.yml
+7-1 b/‎.github/workflows/pr-e2e-test.yml
+7-1
diff --git a/‎.github/workflows/release-docker.yml
+1-1 b/‎.github/workflows/release-docker.yml
+1-1
diff --git a/‎.github/workflows/release-fake-tag.yml
+1-1 b/‎.github/workflows/release-fake-tag.yml
+1-1
diff --git a/‎.github/workflows/release-github.yml
+1-1 b/‎.github/workflows/release-github.yml
+1-1
diff --git a/‎.github/workflows/release-pypi.yml
+1-1 b/‎.github/workflows/release-pypi.yml
+1-1
diff --git a/‎assets/llama_7b.jpg
-231 KB b/‎assets/llama_7b.jpg
-231 KB
diff --git a/‎assets/mixtral_8x7b.jpg
-157 KB b/‎assets/mixtral_8x7b.jpg
-157 KB
diff --git a/‎docs/en/benchmark_results.md
-22 b/‎docs/en/benchmark_results.md
-22
diff --git a/‎docs/en/model_support.md
+1-1 b/‎docs/en/model_support.md
+1-1
diff --git a/‎playground/launch_tgi.sh ‎scripts/playground/launch_tgi.sh b/‎playground/launch_tgi.sh ‎scripts/playground/launch_tgi.sh
diff --git a/‎playground/load_tokenizer.py ‎scripts/playground/load_tokenizer.py b/‎playground/load_tokenizer.py ‎scripts/playground/load_tokenizer.py
diff --git a/‎playground/reference_hf.py ‎scripts/playground/reference_hf.py b/‎playground/reference_hf.py ‎scripts/playground/reference_hf.py
diff --git a/‎test/__init__.py b/‎test/__init__.py
diff --git a/‎test/lang/test_bind_cache.py
+6 b/‎test/lang/test_bind_cache.py
+6
diff --git a/‎test/lang/test_srt_backend.py
+3-1 b/‎test/lang/test_srt_backend.py
+3-1
diff --git a/‎test/lang/test_tracing.py
+9-9 b/‎test/lang/test_tracing.py
+9-9
@@ -32,7 +32,13 @@ jobs:
         pip install flashinfer -i https://flashinfer.ai/whl/cu121/torch2.3/ --force-reinstall
         pip install --upgrade transformers
 
-    - name: Launch server and run benchmark
+    - name: Test OpenAI Backend
+      run: |
+        export OPENAI_API_KEY=secrets.OPENAI_API_KEY
+        cd sglang/test/lang
+        python3 test_openai_backend.py
+
+    - name: Benchmark Serving
       run: |
         python3 -m sglang.launch_server --model /home/lmzheng/zhyncs/Meta-Llama-3.1-8B-Instruct --port 8413 --disable-radix-cache &
 
 
@@ -1,4 +1,4 @@
-name: publish docker
+name: Release Docker
 on:
   push:
     branches:
 
@@ -1,4 +1,4 @@
-name: fake tag
+name: Release Fake Tag
 on:
   push:
     branches:
 
@@ -1,4 +1,4 @@
-name: release tag
+name: Release GitHub
 on:
   workflow_dispatch:
 jobs:
 
@@ -1,4 +1,4 @@
-name: publish to pypi
+name: Release PyPI
 on:
   push:
     branches:
 
@@ -11,6 +11,6 @@ To port a model from vLLM to SGLang, you can compare these two files [SGLang LLa
   - Change `forward()` functions, and add `input_metadata`.
   - Add `EntryClass` at the end.
   - Test correctness by comparing the final logits and outputs of the two following commands:
-    - `python3 playground/reference_hf.py --model [new model]`
+    - `python3 scripts/playground/reference_hf.py --model [new model]`
     - `python3 -m sglang.bench_latency --model [new model] --correct --output-len 16 --trust-remote-code`
   - Update [Supported Models](https://github.com/sgl-project/sglang/tree/main?tab=readme-ov-file#supported-models) at [README](../README.md).
@@ -1,3 +1,9 @@
+"""
+Usage:
+python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000
+python3 test_bind_cache.py
+"""
+
 import unittest
 
 import sglang as sgl
 
@@ -1,5 +1,7 @@
 """
-python3 -m sglang.launch_server --model-path meta-llama/Llama-2-7b-chat-hf --port 30000
+Usage:
+python3 -m sglang.launch_server --model-path meta-llama/Meta-Llama-3-8B-Instruct --port 30000
+python3 test_srt_backend.py
 """
 
 import json
 
@@ -16,7 +16,7 @@ def few_shot_qa(s, question):
             s += "A:" + sgl.gen("answer", stop="\n")
 
         tracer = few_shot_qa.trace()
-        print(tracer.last_node.print_graph_dfs() + "\n")
+        # print(tracer.last_node.print_graph_dfs() + "\n")
 
     def test_select(self):
         @sgl.function
@@ -26,7 +26,7 @@ def capital(s):
             s += "It is a city" + sgl.gen("description", stop=".")
 
         tracer = capital.trace()
-        print(tracer.last_node.print_graph_dfs() + "\n")
+        # print(tracer.last_node.print_graph_dfs() + "\n")
 
     def test_raise_warning(self):
         @sgl.function
@@ -66,11 +66,11 @@ def tip_suggestion(s, topic):
             s += "In summary" + sgl.gen("summary")
 
         compiled = tip_suggestion.compile()
-        compiled.print_graph()
+        # compiled.print_graph()
 
         sgl.set_default_backend(sgl.OpenAI("gpt-3.5-turbo-instruct"))
         state = compiled.run(topic="staying healthy")
-        print(state.text() + "\n")
+        # print(state.text() + "\n")
 
         states = compiled.run_batch(
             [
@@ -80,8 +80,8 @@ def tip_suggestion(s, topic):
             ],
             temperature=0,
         )
-        for s in states:
-            print(s.text() + "\n")
+        # for s in states:
+        #     print(s.text() + "\n")
 
     def test_role(self):
         @sgl.function
@@ -95,7 +95,7 @@ def multi_turn_chat(s):
         backend.chat_template = get_chat_template("llama-2-chat")
 
         compiled = multi_turn_chat.compile(backend=backend)
-        compiled.print_graph()
+        # compiled.print_graph()
 
     def test_fork(self):
         @sgl.function
@@ -118,10 +118,10 @@ def tip_suggestion(s):
             s += "In summary" + sgl.gen("summary")
 
         tracer = tip_suggestion.trace()
-        print(tracer.last_node.print_graph_dfs())
+        # print(tracer.last_node.print_graph_dfs())
 
         a = tip_suggestion.run(backend=sgl.OpenAI("gpt-3.5-turbo-instruct"))
-        print(a.text())
+        # print(a.text())
 
 
 if __name__ == "__main__":
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: publish docker`
	`1`	`+name: Release Docker`
`2`	`2`	`on:`
`3`	`3`	`push:`
`4`	`4`	`branches:`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: fake tag`
	`1`	`+name: Release Fake Tag`
`2`	`2`	`on:`
`3`	`3`	`push:`
`4`	`4`	`branches:`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: release tag`
	`1`	`+name: Release GitHub`
`2`	`2`	`on:`
`3`	`3`	`workflow_dispatch:`
`4`	`4`	`jobs:`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-name: publish to pypi`
	`1`	`+name: Release PyPI`
`2`	`2`	`on:`
`3`	`3`	`push:`
`4`	`4`	`branches:`