lightseekorg · key4ng · May 8, 2026 · May 9, 2026 · May 9, 2026 · May 9, 2026
@@ -0,0 +1,13 @@
+name: 'Setup TokenSpeed Backend'
+description: 'Create Python venv and install TokenSpeed (engine + kernel + scheduler) from source.'
+
+runs:
+  using: 'composite'
+  steps:
+    - name: Setup Python venv
+      shell: bash
+      run: bash scripts/ci_setup_python_venv.sh
+
+    - name: Install TokenSpeed
+      shell: bash
+      run: bash scripts/ci_install_tokenspeed.sh
@@ -6,7 +6,7 @@ on:
       engine:
         required: true
         type: string
-        description: "Engine to test: sglang, vllm, or trtllm"
+        description: "Engine to test: sglang, vllm, trtllm, or tokenspeed"
       gpu_tier:
         required: true
         type: string
@@ -68,6 +68,10 @@ jobs:
         if: inputs.engine == 'trtllm'
         uses: ./.github/actions/setup-trtllm
 
+      - name: Setup TokenSpeed backend
+        if: inputs.engine == 'tokenspeed'
+        uses: ./.github/actions/setup-tokenspeed
+
       # Artifact downloads
       - name: Download wheel artifact
         uses: actions/download-artifact@v8

@@ -390,6 +390,7 @@ jobs:
               - 'scripts/ci_setup_python_venv.sh'
               - 'scripts/ci_install_sglang.sh'
               - 'scripts/ci_install_vllm.sh'
+              - 'scripts/ci_install_tokenspeed.sh'
               - 'scripts/ci_install_e2e_deps.sh'
               - 'scripts/ci_killall_sglang.sh'
               - 'scripts/ci_build_wheel.sh'
@@ -404,6 +405,7 @@ jobs:
               - 'e2e_test/router/**'
               - 'scripts/ci_install_vllm.sh'
               - 'scripts/ci_install_trtllm.sh'
+              - 'scripts/ci_install_tokenspeed.sh'
             agentic:
               - 'crates/mcp/**'
               - 'crates/data_connector/**'
@@ -445,6 +447,10 @@ jobs:
             timeout: 20
           - engine: trtllm
             timeout: 90
+          # TokenSpeed builds kernel (CUDA) + scheduler (C++/CMake) from
+          # source, so first run takes ~30 min; cached runs are faster.
+          - engine: tokenspeed
+            timeout: 60
     uses: ./.github/workflows/e2e-gpu-job.yml
     with:
       engine: ${{ matrix.engine }}
@@ -555,6 +561,11 @@ jobs:
             timeout: 20
           - engine: trtllm
             timeout: 30
+          # Picks up TestChatCompletionGptOss (gpt-oss-20b, ``@pytest.mark.gpu(2)``)
+          # on the tokenspeed engine; the 1-GPU job collected the test class but
+          # pytest skipped it at collection because the runner only had 1 GPU.
+          - engine: tokenspeed
+            timeout: 60
     uses: ./.github/workflows/e2e-gpu-job.yml
     with:
       engine: ${{ matrix.engine }}