Skip to content

Commit b758c1f

Browse files
committed
[fix] Fix test_llm_api_connector
- Reduce call_counts when using the overlap scheduler. Signed-off-by: Robin Kobus <[email protected]>
1 parent 621dace commit b758c1f

File tree

1 file changed

+6
-13
lines changed

1 file changed

+6
-13
lines changed

tests/integration/defs/llmapi/test_llm_api_connector.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,7 @@ def test_connector_simple(enforce_single_worker, model_with_connector,
8888
assert len(scheduler.update_state_after_alloc.call_args.args[1]) == 1
8989

9090
# With the overlap scheduler, we generate one extra token.
91-
assert scheduler.build_connector_meta.call_count == NUM_TOKENS + int(
92-
use_overlap_scheduler)
91+
assert scheduler.build_connector_meta.call_count == NUM_TOKENS
9392

9493
# We should have a single `SchedulerOutput` per forward pass.
9594
for i, call in enumerate(scheduler.build_connector_meta.call_args_list):
@@ -109,8 +108,7 @@ def test_connector_simple(enforce_single_worker, model_with_connector,
109108
assert len(scheduler_output.cached_requests[0].new_tokens) == 1
110109

111110
# We call `start_load_kv` once at the beginning of each forward pass.
112-
assert worker.start_load_kv.call_count == NUM_TOKENS + int(
113-
use_overlap_scheduler)
111+
assert worker.start_load_kv.call_count == NUM_TOKENS
114112

115113
# Only called once when the request is received.
116114
assert scheduler.get_num_new_matched_tokens.call_count == 1
@@ -119,19 +117,16 @@ def test_connector_simple(enforce_single_worker, model_with_connector,
119117
for call in worker.wait_for_layer_load.call_args_list) + 1
120118

121119
# Called num_layers * num_forward_passes times.
122-
assert worker.wait_for_layer_load.call_count == num_layers * (
123-
NUM_TOKENS + int(use_overlap_scheduler))
124-
assert worker.save_kv_layer.call_count == num_layers * (
125-
NUM_TOKENS + int(use_overlap_scheduler))
120+
assert worker.wait_for_layer_load.call_count == num_layers * (NUM_TOKENS)
121+
assert worker.save_kv_layer.call_count == num_layers * (NUM_TOKENS)
126122

127123
for i, call in enumerate(worker.wait_for_layer_load.call_args_list):
128124
assert call.args[0] == i % num_layers
129125

130126
for i, call in enumerate(worker.save_kv_layer.call_args_list):
131127
assert call.args[0] == i % num_layers
132128

133-
assert worker.wait_for_save.call_count == NUM_TOKENS + int(
134-
use_overlap_scheduler)
129+
assert worker.wait_for_save.call_count == NUM_TOKENS
135130

136131
assert scheduler.request_finished.call_count == 1
137132

@@ -238,9 +233,7 @@ def test_connector_scheduler_output(enforce_single_worker, model_with_connector,
238233
scheduler.update_state_after_alloc.call_args.args[1]) == math.ceil(
239234
NUM_INPUT_TOKENS / BLOCK_SIZE)
240235

241-
# Additional token when using the overlap scheduler.
242-
assert scheduler.build_connector_meta.call_count == NUM_TOKENS + int(
243-
use_overlap_scheduler)
236+
assert scheduler.build_connector_meta.call_count == NUM_TOKENS
244237

245238
for i, call in enumerate(scheduler.build_connector_meta.call_args_list):
246239
sched_output = call.args[0]

0 commit comments

Comments
 (0)