Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-pr-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ jobs:

- name: Standup a modelservice using llm-d-inference-sim
env:
LLMDBENCH_HF_TOKEN: hf-token-placeholder
LLMDBENCH_HF_TOKEN: ${{ secrets.LLMDBENCH_HF_TOKEN }}
run: |
./setup/standup.sh -c kind_modelservice_inference-sim -t modelservice -s 0,1,2,4,7,8,9

Expand Down
5 changes: 3 additions & 2 deletions scenarios/kind_modelservice_inference-sim.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_NR=0
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_NR=0
export LLMDBENCH_VLLM_MODELSERVICE_DECODE_CPU_MEM=100Mi
export LLMDBENCH_VLLM_MODELSERVICE_PREFILL_CPU_MEM=100Mi
export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL="hf"
# export LLMDBENCH_VLLM_MODELSERVICE_URI_PROTOCOL="hf"
export LLMDBENCH_DEPLOY_MODEL_LIST="facebook/opt-125m"
export LLMDBENCH_HARNESS_PVC_SIZE=3Gi
export LLMDBENCH_VLLM_MODELSERVICE_INFERENCE_MODEL=true

export LLMDBENCH_VLLM_COMMON_PVC_ACCESSMODE=ReadWriteOnce
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE=1Gi
1 change: 1 addition & 0 deletions setup/env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ export LLMDBENCH_VLLM_COMMON_BLOCK_SIZE=${LLMDBENCH_VLLM_COMMON_BLOCK_SIZE:-64}
export LLMDBENCH_VLLM_COMMON_MAX_NUM_BATCHED_TOKENS=${LLMDBENCH_VLLM_COMMON_MAX_NUM_BATCHED_TOKENS:-4096}
export LLMDBENCH_VLLM_COMMON_PVC_NAME=${LLMDBENCH_VLLM_COMMON_PVC_NAME:-"model-pvc"}
export LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS="${LLMDBENCH_VLLM_COMMON_PVC_STORAGE_CLASS:-default}"
export LLMDBENCH_VLLM_COMMON_PVC_ACCESSMODE=${LLMDBENCH_VLLM_COMMON_PVC_ACCESSMODE:-ReadWriteMany}
export LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE="${LLMDBENCH_VLLM_COMMON_PVC_MODEL_CACHE_SIZE:-300Gi}"
export LLMDBENCH_VLLM_COMMON_PVC_DOWNLOAD_TIMEOUT=${LLMDBENCH_VLLM_COMMON_PVC_DOWNLOAD_TIMEOUT:-"2400"}
export LLMDBENCH_VLLM_COMMON_HF_TOKEN_KEY="${LLMDBENCH_VLLM_COMMON_HF_TOKEN_KEY:-"HF_TOKEN"}"
Expand Down
3 changes: 2 additions & 1 deletion setup/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,7 @@ def validate_and_create_pvc(
pvc_name: str,
pvc_size: str,
pvc_class: str,
pvc_access_mode: str = 'ReadWriteMany',
dry_run: bool = False
):
announce("Provisioning model storage…")
Expand Down Expand Up @@ -294,7 +295,7 @@ def validate_and_create_pvc(
"namespace": namespace,
},
"spec": {
"accessModes": ["ReadWriteMany"],
"accessModes": [pvc_access_mode],
"resources": {
"requests": {"storage": pvc_size}
},
Expand Down
11 changes: 5 additions & 6 deletions setup/steps/04_ensure_model_namespace_prepared.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,6 @@ def main():
ev = {}
environment_variable_to_dict(ev)

env_cmd=f'source "{ev["control_dir"]}/env.sh"'
result = llmdbench_execute_cmd(actual_cmd=env_cmd, dry_run=ev["control_dry_run"], verbose=ev["control_verbose"])
if result != 0:
announce(f"❌ Failed while running \"{env_cmd}\" (exit code: {result})")
exit(result)

api = kube_connect(f'{ev["control_work_dir"]}/environment/context.ctx')
if ev["control_dry_run"] :
announce("DRY RUN enabled. No actual changes will be made.")
Expand Down Expand Up @@ -110,13 +104,18 @@ def main():
protocol, pvc_and_model_path = model_artifact_uri.split("://") # protocol var unused but exists in prev script
pvc_name, model_path = pvc_and_model_path.split('/', 1) # split from first occurence

announce(f'pvc_name = {ev["vllm_common_pvc_name"]}')
announce(f'pvc_size = {ev["vllm_common_pvc_model_cache_size"]}')
announce(f'pvc_class = {ev["vllm_common_pvc_storage_class"]}')
announce(f'pvc_access_mode = {ev["vllm_common_pvc_accessmode"]}')
validate_and_create_pvc(
api=api,
namespace=ev["vllm_common_namespace"],
download_model=download_model,
pvc_name=ev["vllm_common_pvc_name"],
pvc_size=ev["vllm_common_pvc_model_cache_size"],
pvc_class=ev["vllm_common_pvc_storage_class"],
pvc_access_mode=ev["vllm_common_pvc_accessmode"],
dry_run=ev["control_dry_run"]
)

Expand Down
Loading