@@ -180,28 +180,30 @@ def test_llm_reward_model():
180180
181181@skip_ray
182182def test_llm_perf_metrics ():
183- llm = LLM (model = llama_model_path , kv_cache_config = global_kvcache_config )
184- sampling_params = SamplingParams (max_tokens = 10 , return_perf_metrics = True )
185- outputs = llm .generate (prompts , sampling_params )
186- assert outputs [0 ].outputs [0 ].request_perf_metrics is not None
187-
188- perf_metrics = outputs [0 ].outputs [0 ].request_perf_metrics
189-
190- timing_metrics = perf_metrics .timing_metrics
191- assert timing_metrics .arrival_time < timing_metrics .first_scheduled_time
192- assert timing_metrics .first_scheduled_time < timing_metrics .first_token_time
193- assert timing_metrics .first_token_time < timing_metrics .last_token_time
194-
195- kv_cache_metrics = perf_metrics .kv_cache_metrics
196- assert kv_cache_metrics .num_total_allocated_blocks == 1
197- assert kv_cache_metrics .num_new_allocated_blocks == 1
198- assert kv_cache_metrics .num_reused_blocks == 0
199- assert kv_cache_metrics .num_missed_blocks == 1
200- assert kv_cache_metrics .kv_cache_hit_rate == 0
201-
202- assert perf_metrics .first_iter is not None
203- assert perf_metrics .iter - perf_metrics .first_iter == sampling_params .max_tokens - 1
204- assert perf_metrics .last_iter == perf_metrics .iter
183+ with LLM (model = llama_model_path ,
184+ kv_cache_config = global_kvcache_config ) as llm :
185+ sampling_params = SamplingParams (max_tokens = 10 ,
186+ return_perf_metrics = True )
187+ outputs = llm .generate (prompts , sampling_params )
188+ assert outputs [0 ].outputs [0 ].request_perf_metrics is not None
189+
190+ perf_metrics = outputs [0 ].outputs [0 ].request_perf_metrics
191+
192+ timing_metrics = perf_metrics .timing_metrics
193+ assert timing_metrics .arrival_time < timing_metrics .first_scheduled_time
194+ assert timing_metrics .first_scheduled_time < timing_metrics .first_token_time
195+ assert timing_metrics .first_token_time < timing_metrics .last_token_time
196+
197+ kv_cache_metrics = perf_metrics .kv_cache_metrics
198+ assert kv_cache_metrics .num_total_allocated_blocks == 1
199+ assert kv_cache_metrics .num_new_allocated_blocks == 1
200+ assert kv_cache_metrics .num_reused_blocks == 0
201+ assert kv_cache_metrics .num_missed_blocks == 1
202+ assert kv_cache_metrics .kv_cache_hit_rate == 0
203+
204+ assert perf_metrics .first_iter is not None
205+ assert perf_metrics .iter - perf_metrics .first_iter == sampling_params .max_tokens - 1
206+ assert perf_metrics .last_iter == perf_metrics .iter
205207
206208
207209@skip_ray
0 commit comments