diff --git a/skyrl/train/utils/utils.py b/skyrl/train/utils/utils.py index 94431caa9a..35984656d5 100644 --- a/skyrl/train/utils/utils.py +++ b/skyrl/train/utils/utils.py @@ -605,6 +605,9 @@ def prepare_runtime_environment(cfg: SkyRLTrainConfig) -> dict[str, str]: # TODO(sumanthrh): introduce a debug mode and add debugging flags like `CUDA_LAUNCH_BLOCKING` here env_vars = {} + # manually set this for testing everywhere + env_vars["VLLM_USE_RAY_V2_EXECUTOR_BACKEND"] = "1" + # NOTE (erictang000): This should no longer be required since this has been removed in vllm # and fixed in NCCL (https://github.com/vllm-project/vllm/pull/24141, https://github.com/NVIDIA/nccl/issues/1234), but empirically seeing OOMs for # that previously ran successfully, so keeping this to maintain backwards compatibility. diff --git a/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py b/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py index cdfbd4a0f3..97dc8744e6 100644 --- a/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py +++ b/tests/backends/skyrl_train/gpu/gpu_ci/conftest.py @@ -21,6 +21,7 @@ def _build_ray_env_vars(): "VLLM_USE_V1": "1", "VLLM_ENABLE_V1_MULTIPROCESSING": "0", "VLLM_ALLOW_INSECURE_SERIALIZATION": "1", + "VLLM_USE_RAY_V2_EXECUTOR_BACKEND": "1", "_SKYRL_USE_NEW_INFERENCE": "1" if _SKYRL_USE_NEW_INFERENCE else "0", }