moved llama benchmark, sglang benchmark, sglang integration, and sdxl to ossci cluster (#971)

Eliasj42 · Elias Joseph · saienduri · web-flow · commit 80674bce3407 · 2025-02-17T16:58:13.000-08:00
moved llama benchmark, sglang benchmark, sglang integration, and sdxl to
ossci cluster

---------

Signed-off-by: Elias Joseph &lt;eljoseph@amd.com&gt;
Co-authored-by: Elias Joseph &lt;eljoseph@amd.com&gt;
Co-authored-by: saienduri &lt;saimanas.enduri@amd.com&gt;
diff --git a/.github/workflows/ci-llama-large-tests.yaml b/.github/workflows/ci-llama-large-tests.yaml
@@ -28,7 +28,7 @@ jobs:
       matrix:
         version: [3.11]
       fail-fast: false
-    runs-on: llama-mi300x-1
+    runs-on: linux-mi300-1gpu-ossci
     defaults:
       run:
         shell: bash
diff --git a/.github/workflows/ci-sdxl.yaml b/.github/workflows/ci-sdxl.yaml
@@ -37,7 +37,7 @@ env:
 jobs:
   install-and-test:
     name: Install and test
-    runs-on: mi300x-3
+    runs-on: linux-mi300-1gpu-ossci
 
     steps:
     - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
diff --git a/.github/workflows/ci-sglang-benchmark.yml b/.github/workflows/ci-sglang-benchmark.yml
@@ -40,7 +40,7 @@ jobs:
       matrix:
         version: [3.11]
       fail-fast: false
-    runs-on: mi300x-3
+    runs-on: linux-mi300-1gpu-ossci
     defaults:
       run:
         shell: bash
@@ -82,7 +82,9 @@ jobs:
 
       - name: Login to huggingface
         continue-on-error: true
-        run: huggingface-cli login --token ${{ secrets.HF_TOKEN }}
+        run: |
+          pip install -U "huggingface_hub[cli]"
+          huggingface-cli login --token ${{ secrets.HF_TOKEN }}
 
       - name: Run Shortfin Benchmark Tests
         run: |
@@ -101,7 +103,7 @@ jobs:
       matrix:
         version: [3.11]
       fail-fast: false
-    runs-on: mi300x-3
+    runs-on: linux-mi300-1gpu-ossci
     defaults:
       run:
         shell: bash
@@ -187,15 +189,11 @@ jobs:
     needs: benchmark_sglang
     name: "Docker Cleanup"
     if: always()
-    runs-on: mi300x-3
+    runs-on: linux-mi300-1gpu-ossci
     steps:
       - name: Stop sglang-server
         run: docker stop sglang-server || true # Stop container if it's running
 
-      # Deleting image after run due to large disk space requirement (83 GB)
-      - name: Cleanup SGLang Image
-        run: docker image rm lmsysorg/sglang:v0.3.5.post1-rocm620
-
   merge_and_upload_reports:
     name: "Merge and upload benchmark reports"
     needs: [benchmark_shortfin, benchmark_sglang]
diff --git a/.github/workflows/ci-sglang-integration-tests.yml b/.github/workflows/ci-sglang-integration-tests.yml
@@ -29,7 +29,7 @@ jobs:
       matrix:
         version: [3.11]
       fail-fast: false
-    runs-on: mi300x-3
+    runs-on: linux-mi300-1gpu-ossci
     defaults:
       run:
         shell: bash
@@ -69,7 +69,6 @@ jobs:
           pip install sentence_transformers
 
           pip freeze
-
       - name: Run Integration Tests
         run: |
           source ${VENV_DIR}/bin/activate
diff --git a/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py b/app_tests/benchmark_tests/llm/sglang_benchmarks/shortfin_benchmark_test.py
@@ -60,7 +60,7 @@ def test_shortfin_benchmark(
     request,
 ):
     # TODO: Remove when multi-device is fixed
-    os.environ["ROCR_VISIBLE_DEVICES"] = "1"
+    os.environ["ROCR_VISIBLE_DEVICES"] = "0"
 
     process, port = server
 
diff --git a/app_tests/integration_tests/llm/sglang/conftest.py b/app_tests/integration_tests/llm/sglang/conftest.py
@@ -54,7 +54,7 @@ def model_artifacts(request, tmp_path_factory):
 
 @pytest.fixture(scope="module")
 def start_server(request, model_artifacts):
-    os.environ["ROCR_VISIBLE_DEVICES"] = "1"
+    os.environ["ROCR_VISIBLE_DEVICES"] = "0"
     device_settings = request.param["device_settings"]
 
     server_config = ServerConfig(