flagos-ai · tengqm · Apr 4, 2026 · Apr 3, 2026
diff --git a/.github/workflows/backend-test.yaml b/.github/workflows/backend-test.yaml
@@ -26,14 +26,9 @@ on:
         required: false
         type: string
         default: ''
-
-# NOTE: The following environment variables are for CI runners to
-# mitigate the network connection problems. However, this may not work
-# if the PR is triggered from a forked repository because in that case
-# GitHub doesn't allow the workflow to access any secrets.
-# env:
-#  http_proxy: ${{ secrets.HTTP_PROXY }}
-#  https_proxy: ${{ secrets.HTTPS_PROXY }}
+    secrets:
+      RUNNER_SSH_KEY:
+        required: true
 
 permissions:
   contents: read

diff --git a/.github/workflows/unittest.yaml b/.github/workflows/unittest.yaml
@@ -113,6 +113,8 @@ jobs:
       runner_label: h20
       gpu_check_script: tools/gpu_check.sh
       test_script: tools/run_backend_tests_nvidia.sh
+    secrets:
+      RUNNER_SSH_KEY: ${{ secrets.RUNNER_SSH_KEY }}
 
   backend-ascend:
     needs: preprocess
@@ -123,16 +125,20 @@ jobs:
       runner_label: ascend_CI
       gpu_check_script: tools/gpu_check_ascend.sh
       test_script: tools/run_backend_tests.sh
+    secrets:
+      RUNNER_SSH_KEY: ${{ secrets.RUNNER_SSH_KEY }}
 
   backend-iluvatar:
     needs: preprocess
     if: contains(needs.preprocess.outputs.labels, 'vendor/Iluvatar')
     uses: ./.github/workflows/backend-test.yaml
     with:
       vendor: iluvatar
-      runner_label: iluvatar_CI
+      runner_label: iluvatar
       gpu_check_script: tools/gpu_check_iluvatar.sh
       test_script: tools/run_backend_tests.sh
+    secrets:
+      RUNNER_SSH_KEY: ${{ secrets.RUNNER_SSH_KEY }}
 
   backend-metax:
     needs: preprocess
@@ -142,6 +148,8 @@ jobs:
       vendor: metax
       runner_label: metax_CI
       test_script: tools/run_backend_tests_metax.sh
+    secrets:
+      RUNNER_SSH_KEY: ${{ secrets.RUNNER_SSH_KEY }}
 
   backend-moore:
     needs: preprocess
@@ -152,6 +160,8 @@ jobs:
       runner_label: moore_CI
       gpu_check_script: tools/gpu_check_moore.sh
       test_script: tools/run_backend_tests.sh
+    secrets:
+      RUNNER_SSH_KEY: ${{ secrets.RUNNER_SSH_KEY }}
 
   # TODO(Qiming): This job doesn't require an nvidia backend, the generic
   # test-op workflow should be fine.
@@ -165,3 +175,5 @@ jobs:
       gpu_check_script: tools/gpu_check.sh
       test_script: tools/test-op-experimental.sh
       changed_files: ${{ needs.preprocess.outputs.changed_files }}
+    secrets:
+      RUNNER_SSH_KEY: ${{ secrets.RUNNER_SSH_KEY }}
diff --git a/pyproject.toml b/pyproject.toml
@@ -62,6 +62,14 @@ official_torch_270 = [
     "torchaudio==2.7.0",
 ]
 
+# Turned out that iluvatar requires native CUDA toolkit 10.2
+# cuda_runtime = [
+#     "nvidia-cublas-cu12",
+#     "nvidia-cuda-runtime-cu12",
+#     "nvidia-cuda-nvrtc-cu12",
+#     "nvidia-cudnn-cu12",
+# ]
+
 nvidia = [
     "flag_gems[official_torch_290]",
 ]

diff --git a/src/flag_gems/runtime/backend/_iluvatar/op_black_list.yaml b/src/flag_gems/runtime/backend/_iluvatar/op_black_list.yaml
@@ -2,14 +2,17 @@
 # Description: List of operators unsupported (FALSE) in the current environment
 # test based: 2026-03-11
 # Source: Operator Compatibility Test Table
-
+#
 unsupported_operators:
   - name: grouped_topk
-    reason: "All dtypes failed"
+    reason: All dtypes failed
 
   - name: topk_softmax
-    reason: "vllm not supported"
+    reason: vllm not supported
 
 summary:
   total_unsupported: 2
-  note: "This list is based on items explicitly marked as FALSE in the provided table. Certain operators (e.g., conv1d, index_add) have blank entries in some columns and are therefore excluded; their support status requires further verification."
+  note: |
+    This list is based on items explicitly marked as FALSE in the provided table.
+    Certain operators (e.g., `conv1d`, `index_add`) have blank entries in some columns and
+    are therefore excluded; their support status requires further verification.
diff --git a/src/flag_gems/runtime/backend/_iluvatar/ops/div.py b/src/flag_gems/runtime/backend/_iluvatar/ops/div.py
@@ -6,6 +6,7 @@
 
 from flag_gems.utils import pointwise_dynamic, tl_extra_shim
 
+# TODO: Check if this logger instantiation is good
 logger = logging.getLogger(__name__)
 div_rn = tl_extra_shim.div_rn
 div_rz = tl_extra_shim.div_rz

diff --git a/tools/gpu_check_iluvatar.sh b/tools/gpu_check_iluvatar.sh
@@ -4,6 +4,8 @@
 memory_usage_max=30000     # Maximum memory usage limit (MB)
 sleep_time=120             # Wait time (seconds), default is 2 minutes
 
+export LD_LIBRARY_PATH=/usr/local/corex/lib:$LD_LIBRARY_PATH
+
 # Get the number of GPUs
 gpu_count=$(ixsmi --query-gpu=name --format=csv,noheader 2>/dev/null | wc -l)
 

diff --git a/tools/run_backend_tests_iluvatar.sh b/tools/run_backend_tests_iluvatar.sh
@@ -3,12 +3,19 @@
 VENDOR=${1:?"Usage: bash tools/run_backend_tests_iluvatar.sh <vendor>"}
 export GEMS_VENDOR=$VENDOR
 
-source tools/run_command.sh
-
 echo "Running FlagGems tests with GEMS_VENDOR=$VENDOR"
 
-run_command python3 -m pytest -s tests/test_tensor_constructor_ops.py
-run_command python3 -m pytest -s tests/test_shape_utils.py
-run_command python3 -m pytest -s tests/test_tensor_wrapper.py
-run_command python3 -m pytest -s tests/test_pointwise_dynamic.py
-run_command python3 -m pytest -s tests/test_distribution_ops.py
+export LD_LIBRARY_PATH=/usr/local/corex-4.4.0/lib:/usr/local/cuda/compat:$LD_LIBRARY_PATH
+echo $LD_LIBRARY_PATH
+export PYENV_ROOT="$HOME/.pyenv"
+export PATH="$PYENV_ROOT/bin:$PATH"
+eval "$(pyenv init - bash)"
+
+pip install -U pip
+pip install uv
+uv venv
+source .venv/bin/activate
+uv pip install setuptools==82.0.1 scikit-build-core==0.12.2 pybind11==3.0.3 cmake==3.31.10 ninja==1.13.0
+uv pip install -e .[iluvatar,test]
+
+pytest -s tests