@@ -88,8 +88,7 @@ def test_connector_simple(enforce_single_worker, model_with_connector,
8888 assert len (scheduler .update_state_after_alloc .call_args .args [1 ]) == 1
8989
9090 # With the overlap scheduler, we generate one extra token.
91- assert scheduler .build_connector_meta .call_count == NUM_TOKENS + int (
92- use_overlap_scheduler )
91+ assert scheduler .build_connector_meta .call_count == NUM_TOKENS
9392
9493 # We should have a single `SchedulerOutput` per forward pass.
9594 for i , call in enumerate (scheduler .build_connector_meta .call_args_list ):
@@ -109,8 +108,7 @@ def test_connector_simple(enforce_single_worker, model_with_connector,
109108 assert len (scheduler_output .cached_requests [0 ].new_tokens ) == 1
110109
111110 # We call `start_load_kv` once at the beginning of each forward pass.
112- assert worker .start_load_kv .call_count == NUM_TOKENS + int (
113- use_overlap_scheduler )
111+ assert worker .start_load_kv .call_count == NUM_TOKENS
114112
115113 # Only called once when the request is received.
116114 assert scheduler .get_num_new_matched_tokens .call_count == 1
@@ -119,19 +117,16 @@ def test_connector_simple(enforce_single_worker, model_with_connector,
119117 for call in worker .wait_for_layer_load .call_args_list ) + 1
120118
121119 # Called num_layers * num_forward_passes times.
122- assert worker .wait_for_layer_load .call_count == num_layers * (
123- NUM_TOKENS + int (use_overlap_scheduler ))
124- assert worker .save_kv_layer .call_count == num_layers * (
125- NUM_TOKENS + int (use_overlap_scheduler ))
120+ assert worker .wait_for_layer_load .call_count == num_layers * (NUM_TOKENS )
121+ assert worker .save_kv_layer .call_count == num_layers * (NUM_TOKENS )
126122
127123 for i , call in enumerate (worker .wait_for_layer_load .call_args_list ):
128124 assert call .args [0 ] == i % num_layers
129125
130126 for i , call in enumerate (worker .save_kv_layer .call_args_list ):
131127 assert call .args [0 ] == i % num_layers
132128
133- assert worker .wait_for_save .call_count == NUM_TOKENS + int (
134- use_overlap_scheduler )
129+ assert worker .wait_for_save .call_count == NUM_TOKENS
135130
136131 assert scheduler .request_finished .call_count == 1
137132
@@ -238,9 +233,7 @@ def test_connector_scheduler_output(enforce_single_worker, model_with_connector,
238233 scheduler .update_state_after_alloc .call_args .args [1 ]) == math .ceil (
239234 NUM_INPUT_TOKENS / BLOCK_SIZE )
240235
241- # Additional token when using the overlap scheduler.
242- assert scheduler .build_connector_meta .call_count == NUM_TOKENS + int (
243- use_overlap_scheduler )
236+ assert scheduler .build_connector_meta .call_count == NUM_TOKENS
244237
245238 for i , call in enumerate (scheduler .build_connector_meta .call_args_list ):
246239 sched_output = call .args [0 ]
0 commit comments