Skip to content

Commit 83c425f

Browse files
committed
Refactor test_llm_perf_metrics to use context manager for LLM instance
- Updated the test to utilize a context manager for the LLM instance, improving resource management. - Maintained existing assertions to validate performance metrics. Signed-off-by: Robin Kobus <[email protected]>
1 parent b758c1f commit 83c425f

File tree

1 file changed

+24
-22
lines changed

1 file changed

+24
-22
lines changed

tests/unittest/llmapi/test_llm_pytorch.py

Lines changed: 24 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -180,28 +180,30 @@ def test_llm_reward_model():
180180

181181
@skip_ray
182182
def test_llm_perf_metrics():
183-
llm = LLM(model=llama_model_path, kv_cache_config=global_kvcache_config)
184-
sampling_params = SamplingParams(max_tokens=10, return_perf_metrics=True)
185-
outputs = llm.generate(prompts, sampling_params)
186-
assert outputs[0].outputs[0].request_perf_metrics is not None
187-
188-
perf_metrics = outputs[0].outputs[0].request_perf_metrics
189-
190-
timing_metrics = perf_metrics.timing_metrics
191-
assert timing_metrics.arrival_time < timing_metrics.first_scheduled_time
192-
assert timing_metrics.first_scheduled_time < timing_metrics.first_token_time
193-
assert timing_metrics.first_token_time < timing_metrics.last_token_time
194-
195-
kv_cache_metrics = perf_metrics.kv_cache_metrics
196-
assert kv_cache_metrics.num_total_allocated_blocks == 1
197-
assert kv_cache_metrics.num_new_allocated_blocks == 1
198-
assert kv_cache_metrics.num_reused_blocks == 0
199-
assert kv_cache_metrics.num_missed_blocks == 1
200-
assert kv_cache_metrics.kv_cache_hit_rate == 0
201-
202-
assert perf_metrics.first_iter is not None
203-
assert perf_metrics.iter - perf_metrics.first_iter == sampling_params.max_tokens - 1
204-
assert perf_metrics.last_iter == perf_metrics.iter
183+
with LLM(model=llama_model_path,
184+
kv_cache_config=global_kvcache_config) as llm:
185+
sampling_params = SamplingParams(max_tokens=10,
186+
return_perf_metrics=True)
187+
outputs = llm.generate(prompts, sampling_params)
188+
assert outputs[0].outputs[0].request_perf_metrics is not None
189+
190+
perf_metrics = outputs[0].outputs[0].request_perf_metrics
191+
192+
timing_metrics = perf_metrics.timing_metrics
193+
assert timing_metrics.arrival_time < timing_metrics.first_scheduled_time
194+
assert timing_metrics.first_scheduled_time < timing_metrics.first_token_time
195+
assert timing_metrics.first_token_time < timing_metrics.last_token_time
196+
197+
kv_cache_metrics = perf_metrics.kv_cache_metrics
198+
assert kv_cache_metrics.num_total_allocated_blocks == 1
199+
assert kv_cache_metrics.num_new_allocated_blocks == 1
200+
assert kv_cache_metrics.num_reused_blocks == 0
201+
assert kv_cache_metrics.num_missed_blocks == 1
202+
assert kv_cache_metrics.kv_cache_hit_rate == 0
203+
204+
assert perf_metrics.first_iter is not None
205+
assert perf_metrics.iter - perf_metrics.first_iter == sampling_params.max_tokens - 1
206+
assert perf_metrics.last_iter == perf_metrics.iter
205207

206208

207209
@skip_ray

0 commit comments

Comments
 (0)