Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions .github/configs/amd-master.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,73 @@ gptoss-fp4-mi355x-vllm:
- { tp: 1, conc-start: 4, conc-end: 128 }
- { tp: 4, conc-start: 4, conc-end: 4 }
- { tp: 8, conc-start: 4, conc-end: 8 }

dsr1-fp4-mi355x-atom:
image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
model: amd/DeepSeek-R1-0528-MXFP4-Preview
model-prefix: dsr1
runner: mi355x-m15-17
precision: fp4
framework: atom
multinode: false
seq-len-configs:
- isl: 1024
osl: 1024
search-space:
- { tp: 4, conc-start: 4, conc-end: 128 }
- { tp: 8, conc-start: 4, conc-end: 128 }
- isl: 1024
osl: 8192
search-space:
- { tp: 8, conc-start: 4, conc-end: 128 }
- isl: 8192
osl: 1024
search-space:
- { tp: 8, conc-start: 4, conc-end: 128 }

dsr1-fp8-mi355x-atom:
image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
model: deepseek-ai/DeepSeek-R1-0528
model-prefix: dsr1
runner: mi355x-m15-17
precision: fp8
framework: atom
multinode: false
seq-len-configs:
- isl: 1024
osl: 1024
search-space:
- { tp: 8, conc-start: 4, conc-end: 128 }
- isl: 1024
osl: 8192
search-space:
- { tp: 8, conc-start: 4, conc-end: 128 }
- isl: 8192
osl: 1024
search-space:
- { tp: 8, conc-start: 4, conc-end: 128 }

gptoss-fp4-mi355x-atom:
image: rocm/atom:rocm7.1.1-ubuntu24.04-pytorch2.9-atom0.1.1-MI350x
model: openai/gpt-oss-120b
model-prefix: gptoss
runner: mi355x-m15-17
precision: fp4
framework: atom
multinode: false
seq-len-configs:
- isl: 1024
osl: 1024
search-space:
- { tp: 1, conc-start: 4, conc-end: 128 }
- { tp: 8, conc-start: 4, conc-end: 128 }
- isl: 1024
osl: 8192
search-space:
- { tp: 1, conc-start: 4, conc-end: 128 }
- { tp: 8, conc-start: 4, conc-end: 128 }
- isl: 8192
osl: 1024
search-space:
- { tp: 1, conc-start: 4, conc-end: 128 }
- { tp: 8, conc-start: 4, conc-end: 128 }
3 changes: 3 additions & 0 deletions .github/configs/runners.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,5 +53,8 @@ mi355x:
- 'mi355x-amd_1'
- 'mi355x-amd_2'
- 'mi355x-amd_3'
mi355x-m15-17:
- 'mi355x-amdatom'
- 'mi355x-amdatomtw'
gb200:
- gb200-nv_0
11 changes: 4 additions & 7 deletions .github/workflows/benchmark-tmpl.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,10 @@ jobs:
# nvidia-smi --gpu-reset -i 0,1,2,3,4,5,6,7 2>/dev/null || true
fi
else
echo "[Docker] Cleaning up resources ..."
docker ps -aq | xargs -r docker rm -f
docker network prune -f
while [ -n "$(docker ps -aq)" ]; do
docker ps -a
sleep 5
done
echo "[Docker] Cleaning up bmk-server container ..."
docker stop bmk-server 2>/dev/null || true
docker rm -f bmk-server 2>/dev/null || true
docker network rm bmk-net 2>/dev/null || true
fi
fi
if command -v squeue >/dev/null 2>&1; then
Expand Down
24 changes: 24 additions & 0 deletions benchmarks/dsr1_fp4_mi355x_atom_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#!/usr/bin/env bash

# ========= Required Env Vars =========
# HF_TOKEN
# HF_HUB_CACHE
# MODEL
# PORT
# TP
# CONC
# MAX_MODEL_LEN

# Calculate max-model-len based on ISL and OSL
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
CALCULATED_MAX_MODEL_LEN=""
else
CALCULATED_MAX_MODEL_LEN=" --max-model-len 10240 "
fi

set -x
python3 -m atom.entrypoints.openai_server \
--model $MODEL \
--server-port $PORT \
-tp $TP \
--kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN
27 changes: 27 additions & 0 deletions benchmarks/dsr1_fp8_mi355x_atom_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/env bash

# ========= Required Env Vars =========
# HF_TOKEN
# HF_HUB_CACHE
# MODEL
# PORT
# TP
# CONC
# MAX_MODEL_LEN

# Calculate max-model-len based on ISL and OSL
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
CALCULATED_MAX_MODEL_LEN=""
else
CALCULATED_MAX_MODEL_LEN=" --max-model-len 10240 "
fi

set -x

BLOCK_SIZE=${BLOCK_SIZE:-16}
python3 -m atom.entrypoints.openai_server \
--model $MODEL \
--server-port $PORT \
-tp $TP \
--kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN \
--block-size $BLOCK_SIZE
25 changes: 25 additions & 0 deletions benchmarks/gptoss_fp4_mi355x_atom_docker.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/usr/bin/env bash

# ========= Required Env Vars =========
# HF_TOKEN
# HF_HUB_CACHE
# MODEL
# PORT
# TP
# CONC
# MAX_MODEL_LEN

# Calculate max-model-len based on ISL and OSL
if [ "$ISL" = "1024" ] && [ "$OSL" = "1024" ]; then
CALCULATED_MAX_MODEL_LEN=""
else
CALCULATED_MAX_MODEL_LEN=" --max-model-len 10240 "
fi

set -x
export HSA_NO_SCRATCH_RECLAIM=1
python3 -m atom.entrypoints.openai_server \
--model $MODEL \
--server-port $PORT \
-tp $TP \
--kv_cache_dtype fp8 $CALCULATED_MAX_MODEL_LEN
110 changes: 110 additions & 0 deletions runners/launch_mi355x-amdatom.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env bash

# === Workflow-defined Env Vars ===
# IMAGE
# MODEL
# TP
# HF_HUB_CACHE
# ISL
# OSL
# MAX_MODEL_LEN
# RANDOM_RANGE_RATIO
# CONC
# GITHUB_WORKSPACE
# RESULT_FILENAME
# HF_TOKEN
# FRAMEWORK

HF_HUB_CACHE_MOUNT="/mnt/hf_hub_cache/" # Temp solution
PORT=8888

# Determine framework suffix for benchmark script
FRAMEWORK_SUFFIX=$([[ "$FRAMEWORK" == "atom" ]] && printf '_atom' || printf '')

network_name="bmk-net"
server_name="bmk-server"
client_name="bmk-client"

# Cleanup: stop server container and remove network
docker stop $server_name 2>/dev/null || true
docker rm $server_name 2>/dev/null || true
docker network rm $network_name 2>/dev/null || true

docker network create $network_name

set -x
docker pull $IMAGE
DIGEST=$(docker inspect --format='{{index .RepoDigests 0}}' "$IMAGE" | cut -d'@' -f2)
echo "The image digest is: $DIGEST"

set -x
docker run --rm -d --ipc=host --shm-size=16g --network=$network_name --name=$server_name \
--privileged --cap-add=CAP_SYS_ADMIN --device=/dev/kfd --device=/dev/dri --device=/dev/mem \
--cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
-v $HF_HUB_CACHE_MOUNT:$HF_HUB_CACHE \
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
-e HF_TOKEN -e HF_HUB_CACHE -e MODEL -e TP -e CONC -e MAX_MODEL_LEN -e PORT=$PORT \
-e ISL -e OSL \
--entrypoint=/bin/bash \
$IMAGE \
benchmarks/"${EXP_NAME%%_*}_${PRECISION}_mi355x${FRAMEWORK_SUFFIX}_docker.sh"

set +x
while IFS= read -r line; do
printf '%s\n' "$line"
if [[ "$line" =~ Application\ startup\ complete ]]; then
break
fi
done < <(docker logs -f --tail=0 $server_name 2>&1)

if [[ "$MODEL" == "amd/DeepSeek-R1-0528-MXFP4-Preview" || "$MODEL" == "deepseek-ai/DeepSeek-R1-0528" ]]; then
if [[ "$OSL" == "8192" ]]; then
#NUM_PROMPTS=$(( CONC * 20 ))
NUM_PROMPTS=$(( CONC * 2 )) # atom has no much compilation overhead for dsr1
else
#NUM_PROMPTS=$(( CONC * 50 ))
NUM_PROMPTS=$(( CONC * 10 )) # atom has no much compilation overhead for dsr1
fi
else
if [[ "$OSL" == "8192" ]]; then
NUM_PROMPTS=$(( CONC * 2 ))
else
NUM_PROMPTS=$(( CONC * 10 ))
fi
fi

set -x
echo $GITHUB_WORKSPACE
git clone https://github.com/kimbochen/bench_serving.git
git clone https://github.com/kimbochen/bench_serving.git $GITHUB_WORKSPACE/bench_serving

sleep 5

set -x
docker run --rm --network=$network_name --name=$client_name \
-v $GITHUB_WORKSPACE:/workspace/ -w /workspace/ \
-e HF_TOKEN -e PYTHONPYCACHEPREFIX=/tmp/pycache/ \
--entrypoint=/bin/bash \
$(echo "$IMAGE" | sed 's/#/\//') \
-lc "pip install -q datasets pandas && \
python3 bench_serving/benchmark_serving.py \
--model=$MODEL --backend=vllm --base-url="http://$server_name:$PORT" \
--dataset-name=random \
--random-input-len=$ISL --random-output-len=$OSL --random-range-ratio=$RANDOM_RANGE_RATIO \
--num-prompts=$NUM_PROMPTS \
--max-concurrency=$CONC \
--trust-remote-code \
--request-rate=inf --ignore-eos \
--save-result --percentile-metrics="ttft,tpot,itl,e2el" \
--result-dir=/workspace/ --result-filename=$RESULT_FILENAME.json"

if ls gpucore.* 1> /dev/null 2>&1; then
echo "gpucore files exist. not good"
rm -f gpucore.*
fi


# Cleanup: stop server container and remove network
docker stop $server_name 2>/dev/null || true
docker rm $server_name 2>/dev/null || true
docker network rm $network_name 2>/dev/null || true
Loading