- start vLLM
- start aiperf with pynvml:
aiperf profile \
--model "Qwen/Qwen3-32B" \
--endpoint-type chat \
--streaming \
--url localhost:8000 \
--input-file traces/conversation_trace_15mins.jsonl \
--custom-dataset-type mooncake_trace \
--gpu-telemetry pynvml \
--artifact-dir "result/tp_qwen3_32b" \
Got errors:

aiperf profile \ --model "Qwen/Qwen3-32B" \ --endpoint-type chat \ --streaming \ --url localhost:8000 \ --input-file traces/conversation_trace_15mins.jsonl \ --custom-dataset-type mooncake_trace \ --gpu-telemetry pynvml \ --artifact-dir "result/tp_qwen3_32b" \Got errors:
