nod-ai · renxida · Mar 6, 2025 · Mar 4, 2025 · Mar 4, 2025 · Mar 4, 2025
diff --git a/.github/workflows/pkgci_shark_ai.yml b/.github/workflows/pkgci_shark_ai.yml
@@ -28,7 +28,7 @@ jobs:
       matrix:
         include:
           - name: cpu
-            runs-on: azure-cpubuilder-linux-scale
+            runs-on: ubuntu-24.04
             test_device: cpu
             python-version: 3.11
           - name: amdgpu_rocm_mi300_gfx942

diff --git a/app_tests/integration_tests/llm/shortfin/meta_llama31_8b_llm_server_test.py b/app_tests/integration_tests/llm/shortfin/meta_llama31_8b_llm_server_test.py
@@ -64,7 +64,13 @@ def test_basic_generation_input_ids(
                 message=f"Generation did not match expected pattern.\nExpected to start with: {expected_prefix}\nActual response: {response}",
             )
 
-    @pytest.mark.parametrize("concurrent_requests", [2, 4, 8])
+    @pytest.mark.parametrize(
+        "concurrent_requests",
+        [
+            2,
+            4,
+        ],
+    )
     def test_concurrent_generation(
         self, server: tuple[Any, int], concurrent_requests: int
     ) -> None:

diff --git a/app_tests/integration_tests/llm/shortfin/open_llama_3b_llm_server_test.py b/app_tests/integration_tests/llm/shortfin/open_llama_3b_llm_server_test.py
@@ -64,7 +64,7 @@ def test_basic_generation_input_ids(
                 message=f"Generation did not match expected pattern.\nExpected to start with: {expected_prefix}\nActual response: {response}",
             )
 
-    @pytest.mark.parametrize("concurrent_requests", [2, 4, 8])
+    @pytest.mark.parametrize("concurrent_requests", [2, 4])
     def test_concurrent_generation(
         self, server: tuple[Any, int], concurrent_requests: int
     ) -> None:

diff --git a/app_tests/integration_tests/llm/shortfin/tinystories_llama2_25m_test.py b/app_tests/integration_tests/llm/shortfin/tinystories_llama2_25m_test.py
@@ -51,7 +51,13 @@ def test_basic_generation(self, server: tuple[Any, int]) -> None:
                 message=f"Generation did not match expected pattern.\nExpected to start with: {expected_prefix}\nActual response: {response}",
             )
 
-    @pytest.mark.parametrize("concurrent_requests", [2, 4, 8])
+    @pytest.mark.parametrize(
+        "concurrent_requests",
+        [
+            2,
+            4,
+        ],
+    )
     def test_concurrent_generation(
         self, server: tuple[Any, int], concurrent_requests: int
     ) -> None: