diff --git a/.github/workflows/ci-pr-benchmark.yaml b/.github/workflows/ci-pr-benchmark.yaml index 0899f600..8f919c36 100644 --- a/.github/workflows/ci-pr-benchmark.yaml +++ b/.github/workflows/ci-pr-benchmark.yaml @@ -37,7 +37,7 @@ jobs: - name: Standup a modelservice using llm-d-inference-sim env: - LLMDBENCH_HF_TOKEN: hf-token-placeholder + LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }} run: | ./setup/standup.sh -c kind_modelservice_inference-sim -t modelservice -s 0,1,2,4,7,8,9 diff --git a/scenarios/kind_modelservice_inference-sim.sh b/scenarios/kind_modelservice_inference-sim.sh index 7147f560..be2663d3 100644 --- a/scenarios/kind_modelservice_inference-sim.sh +++ b/scenarios/kind_modelservice_inference-sim.sh @@ -14,8 +14,9 @@ export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_NR=0 export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_NR=0 export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_MEM=100Mi export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_MEM=100Mi -export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL="hf" +# export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL="hf" export LLMDBENCH_DEPLOY_MODEL_LIST="facebook/opt-125m" export LLMDBENCH_HARNESS_PVC_SIZE=3Gi export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_MODEL=true - +export LLMDBENCH_VLLM_COMMON_PVC_ACCESSMODE=ReadWriteOnce +export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=1Gi diff --git a/setup/env.sh b/setup/env.sh index 320a6b16..360889c5 100644 --- a/setup/env.sh +++ b/setup/env.sh @@ -65,6 +65,7 @@ export LLMDBENCH_VLLM_COMMON_BLOCK_SIZE=${LLMDBENCH_VLLM_COMMON_BLOCK_SIZE:-64} export LLMDBENCH_VLLM_COMMON_MAX_NUM_BATCHED_TOKENS=${LLMDBENCH_VLLM_COMMON_MAX_NUM_BATCHED_TOKENS:-4096} export LLMDBENCH_VLLM_COMMON_PVC_NAME=${LLMDBENCH_VLLM_COMMON_PVC_NAME:-"model-pvc"} export LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS="${LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS:-default}" +export LLMDBENCH_VLLM_COMMON_PVC_ACCESSMODE=${LLMDBENCH_VLLM_COMMON_PVC_ACCESSMODE:-ReadWriteMany} export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE="${LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE:-300Gi}" export LLMDBENCH_VLLM_COMMON_PVC_DOWNLOAD_TIMEOUT=${LLMDBENCH_VLLM_COMMON_PVC_DOWNLOAD_TIMEOUT:-"2400"} export LLMDBENCH_VLLM_COMMON_HF_TOKEN_KEY="${LLMDBENCH_VLLM_COMMON_HF_TOKEN_KEY:-"HF_TOKEN"}" diff --git a/setup/functions.py b/setup/functions.py index 44f3f967..a5d50f58 100644 --- a/setup/functions.py +++ b/setup/functions.py @@ -251,6 +251,7 @@ def validate_and_create_pvc( pvc_name: str, pvc_size: str, pvc_class: str, + pvc_access_mode: str = 'ReadWriteMany', dry_run: bool = False ): announce("Provisioning model storage…") @@ -294,7 +295,7 @@ def validate_and_create_pvc( "namespace": namespace, }, "spec": { - "accessModes": ["ReadWriteMany"], + "accessModes": [pvc_access_mode], "resources": { "requests": {"storage": pvc_size} }, diff --git a/setup/steps/04_ensure_model_namespace_prepared.py b/setup/steps/04_ensure_model_namespace_prepared.py index ee1055f5..b33dbf3e 100644 --- a/setup/steps/04_ensure_model_namespace_prepared.py +++ b/setup/steps/04_ensure_model_namespace_prepared.py @@ -73,12 +73,6 @@ def main(): ev = {} environment_variable_to_dict(ev) - env_cmd=f'source "{ev["control_dir"]}/env.sh"' - result = llmdbench_execute_cmd(actual_cmd=env_cmd, dry_run=ev["control_dry_run"], verbose=ev["control_verbose"]) - if result != 0: - announce(f"❌ Failed while running \"{env_cmd}\" (exit code: {result})") - exit(result) - api = kube_connect(f'{ev["control_work_dir"]}/environment/context.ctx') if ev["control_dry_run"] : announce("DRY RUN enabled. No actual changes will be made.") @@ -110,6 +104,10 @@ def main(): protocol, pvc_and_model_path = model_artifact_uri.split("://") # protocol var unused but exists in prev script pvc_name, model_path = pvc_and_model_path.split('/', 1) # split from first occurence + announce(f'pvc_name = {ev["vllm_common_pvc_name"]}') + announce(f'pvc_size = {ev["vllm_common_pvc_model_cache_size"]}') + announce(f'pvc_class = {ev["vllm_common_pvc_storage_class"]}') + announce(f'pvc_access_mode = {ev["vllm_common_pvc_accessmode"]}') validate_and_create_pvc( api=api, namespace=ev["vllm_common_namespace"], @@ -117,6 +115,7 @@ def main(): pvc_name=ev["vllm_common_pvc_name"], pvc_size=ev["vllm_common_pvc_model_cache_size"], pvc_class=ev["vllm_common_pvc_storage_class"], + pvc_access_mode=ev["vllm_common_pvc_accessmode"], dry_run=ev["control_dry_run"] )