diff --git a/skyrl/train/utils/utils.py b/skyrl/train/utils/utils.py
index 94431caa9a..35984656d5 100644
--- a/skyrl/train/utils/utils.py
+++ b/skyrl/train/utils/utils.py
@@ -605,6 +605,9 @@ def prepare_runtime_environment(cfg: SkyRLTrainConfig) -> dict[str, str]:
     # TODO(sumanthrh): introduce a debug mode and add debugging flags like `CUDA_LAUNCH_BLOCKING` here
     env_vars = {}
 
+    # manually set this for testing everywhere
+    env_vars["VLLM_USE_RAY_V2_EXECUTOR_BACKEND"] = "1"
+
     # NOTE (erictang000): This should no longer be required since this has been removed in vllm
     # and fixed in NCCL (https://github.com/vllm-project/vllm/pull/24141, https://github.com/NVIDIA/nccl/issues/1234), but empirically seeing OOMs for
     # that previously ran successfully, so keeping this to maintain backwards compatibility.
diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py b/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py
index cdfbd4a0f3..97dc8744e6 100644
--- a/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py
+++ b/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py
@@ -21,6 +21,7 @@ def _build_ray_env_vars():
         "VLLM_USE_V1": "1",
         "VLLM_ENABLE_V1_MULTIPROCESSING": "0",
         "VLLM_ALLOW_INSECURE_SERIALIZATION": "1",
+        "VLLM_USE_RAY_V2_EXECUTOR_BACKEND": "1",
         "_SKYRL_USE_NEW_INFERENCE": "1" if _SKYRL_USE_NEW_INFERENCE else "0",
     }