diff --git a/tests/integration/launch_container.sh b/tests/integration/launch_container.sh index 48733842e..17e7ce196 100755 --- a/tests/integration/launch_container.sh +++ b/tests/integration/launch_container.sh @@ -84,10 +84,7 @@ if [[ "$(support_nvme)" == *"true"* ]]; then fi is_llm=false -if [[ "$platform" == *"-gpu"* ]]; then # if the platform has cuda capabilities - runtime="nvidia" -elif [[ "$platform" == *"lmi"* || "$platform" == *"trtllm"* || "$platform" == *"tensorrt-llm"* ]]; then # Runs multi-gpu - runtime="nvidia" +if [[ "$platform" == *"lmi"* || "$platform" == *"trtllm"* || "$platform" == *"tensorrt-llm"* ]]; then # Runs multi-gpu is_llm=true if [[ "$(is_p4d_or_p5)" == *"true"* || $is_multi_node ]]; then shm="20gb" @@ -157,7 +154,6 @@ if $is_multi_node; then -e DJL_LEADER_ADDR=${leader_hostname} \ -e DJL_WORKER_ADDR_FORMAT="${LWS_NAME}-${GROUP_INDEX}-%d.${LWS_NAME}.${NAMESPACE}" \ ${env_file} \ - ${runtime:+--runtime="${runtime}"} \ ${shm:+--shm-size="${shm}"} \ ${host_device:+ ${host_device}} \ "${docker_image}" "service ssh start; djl-serving" @@ -179,7 +175,6 @@ if $is_multi_node; then -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \ ${env_file} \ - ${runtime:+--runtime="${runtime}"} \ ${shm:+--shm-size="${shm}"} \ ${host_device:+ ${host_device}} \ "${docker_image}" "service ssh start; /usr/bin/python3 /opt/djl/partition/run_multi_node_setup.py 2>&1 | tee /opt/djl/logs/lmi-worker.log; tail -f" @@ -207,7 +202,6 @@ elif $is_sm_neo_context; then -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \ ${env_file} \ - ${runtime:+--runtime="${runtime}"} \ ${shm:+--shm-size="${shm}"} \ ${host_device:+ ${host_device}} \ "${docker_image}" @@ -228,7 +222,6 @@ elif $is_partition; then -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \ - ${runtime:+--runtime="${runtime}"} \ ${shm:+--shm-size="${shm}"} \ ${host_device:+ ${host_device}} \ "${docker_image}" \ @@ -247,7 +240,6 @@ elif [[ "$docker_image" == *"text-generation-inference"* ]]; then -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID \ -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \ - ${runtime:+--runtime="${runtime}"} \ ${shm:+--shm-size="${shm}"} \ "${docker_image}" \ ${args}) @@ -272,7 +264,6 @@ else -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY \ -e AWS_SESSION_TOKEN=$AWS_SESSION_TOKEN \ $uid_mapping \ - ${runtime:+--runtime="${runtime}"} \ ${shm:+--shm-size="${shm}"} \ ${host_device:+ ${host_device}} \ "${docker_image}" \ diff --git a/tests/integration/llm/prepare.py b/tests/integration/llm/prepare.py index 8a81a7000..ff858033d 100644 --- a/tests/integration/llm/prepare.py +++ b/tests/integration/llm/prepare.py @@ -458,6 +458,7 @@ "option.trust_remote_code": True, "option.max_model_len": 16384, "option.gpu_memory_utilization": "0.9", + "option.enable_expert_parallel": True, }, "llama-4-scout-17b-16e-instruct": { "option.model_id": "s3://djl-llm/Llama-4-Scout-17B-16E-Instruct/",