sgl-project
diff --git a/‎.github/workflows/nightly-eval.yml
+4-4 b/‎.github/workflows/nightly-eval.yml
+4-4
diff --git a/‎.github/workflows/pr-test.yml
+2-2 b/‎.github/workflows/pr-test.yml
+2-2
diff --git a/‎.pre-commit-config.yaml
+7-6 b/‎.pre-commit-config.yaml
+7-6
diff --git a/‎LICENSE
+1-1 b/‎LICENSE
+1-1
diff --git a/‎README.md
+1-1 b/‎README.md
+1-1
diff --git a/‎benchmark/lora/lora_bench.py
+13-14 b/‎benchmark/lora/lora_bench.py
+13-14
diff --git a/‎docker/Dockerfile.rocm
+1-1 b/‎docker/Dockerfile.rocm
+1-1
@@ -27,14 +27,14 @@ jobs:
           bash scripts/ci_install_dependency.sh
           pip install --upgrade "evalplus[vllm] @ git+https://github.com/evalplus/evalplus"
 
-      - name: Test human eval
+      - name: Test gsm8k
         timeout-minutes: 120
         run: |
           cd test/srt
-          python3 test_nightly_human_eval.py
+          python3 test_nightly_gsm8k_eval.py
 
-      - name: Test gsm8k
+      - name: Test human eval
         timeout-minutes: 120
         run: |
           cd test/srt
-          python3 test_nightly_gsm8k_eval.py
+          python3 test_nightly_human_eval.py
@@ -118,7 +118,7 @@ jobs:
         timeout-minutes: 10
         run: |
           cd test/srt
-          python3 -m unittest test_bench_latency.TestBenchLatency.test_default
+          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_default
 
       - name: Benchmark online latency
         timeout-minutes: 10
@@ -194,7 +194,7 @@ jobs:
         timeout-minutes: 10
         run: |
           cd test/srt
-          python3 -m unittest test_bench_latency.TestBenchLatency.test_moe_default
+          python3 -m unittest test_bench_one_batch.TestBenchOneBatch.test_moe_default
 
   accuracy-test-1-gpu:
     if: github.repository == 'sgl-project/sglang' || github.event_name == 'pull_request'
 
@@ -1,6 +1,3 @@
-default_language_version:
-    python: python3.9
-
 default_stages: [pre-commit, pre-push, manual]
 
 repos:
@@ -28,7 +25,11 @@ repos:
   - repo: https://github.com/psf/black
     rev: 24.10.0
     hooks:
-      - id: black
-        types: [python]
       - id: black-jupyter
-        types: [jupyter]
+  - repo: https://github.com/kynan/nbstripout
+    rev: 0.8.1
+    hooks:
+      - id: nbstripout
+        args:
+          - '--keep-output'
+          - '--extra-keys=metadata.kernelspec metadata.language_info.version'
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright 2023-2024 SGLang Team
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -37,7 +37,7 @@ The core features include:
 
 - **Fast Backend Runtime**: Provides efficient serving with RadixAttention for prefix caching, jump-forward constrained decoding, continuous batching, token attention (paged attention), tensor parallelism, FlashInfer kernels, chunked prefill, and quantization (INT4/FP8/AWQ/GPTQ).
 - **Flexible Frontend Language**: Offers an intuitive interface for programming LLM applications, including chained generation calls, advanced prompting, control flow, multi-modal inputs, parallelism, and external interactions.
-- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte) and reward models (Skywork), with easy extensibility for integrating new models.
+- **Extensive Model Support**: Supports a wide range of generative models (Llama, Gemma, Mistral, QWen, DeepSeek, LLaVA, etc.), embedding models (e5-mistral, gte, mcdse) and reward models (Skywork), with easy extensibility for integrating new models.
 - **Active Community**: SGLang is open-source and backed by an active community with industry adoption.
 
 ## Getting Started
 
@@ -1,17 +1,16 @@
-"""
-Copyright 2023-2024 SGLang Team
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-"""
+# Copyright 2023-2024 SGLang Team
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 
 import argparse
 import asyncio
 
@@ -1,5 +1,5 @@
 # Usage (to build SGLang ROCm docker image):
-#   docker build --build-arg SGL_BRANCH=v0.3.5.post2 -t testImage -f Dockerfile.rocm .
+#   docker build --build-arg SGL_BRANCH=v0.3.6 -t testImage -f Dockerfile.rocm .
 
 # default base image
 ARG BASE_IMAGE="rocm/vllm-dev:20241022"