Skip to content

Commit 8a751a0

Browse files
authored
[None][chore] Remove is_disaggregated param in executor request queue (#9049)
Signed-off-by: Patrice Castonguay <[email protected]>
1 parent 780d4f9 commit 8a751a0

File tree

3 files changed

+7
-16
lines changed

3 files changed

+7
-16
lines changed

tensorrt_llm/_torch/pyexecutor/executor_request_queue.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class ExecutorRequestQueue:
5050
def __init__(self, dist: Distributed, enable_attention_dp: bool,
5151
max_batch_size: int, max_beam_width: int,
5252
max_num_active_requests: int, enable_iter_perf_stats: bool,
53-
batch_wait_timeout_ms: float, is_disaggregated: bool):
53+
batch_wait_timeout_ms: float):
5454
self.dist = dist
5555
self.request_queue: queue.Queue[RequestQueueItem] = queue.Queue()
5656
self.waiting_queue: deque[RequestQueueItem] = deque()
@@ -59,7 +59,6 @@ def __init__(self, dist: Distributed, enable_attention_dp: bool,
5959
self.max_batch_size = max_batch_size
6060
self.max_beam_width = max_beam_width
6161
self.max_num_active_requests = max_num_active_requests
62-
self.is_disaggregated = is_disaggregated
6362
self.enqueue_lock = threading.Lock()
6463
self.next_request_id = max_batch_size
6564
self.enable_iter_perf_stats = enable_iter_perf_stats

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -254,7 +254,6 @@ def __init__(self,
254254
max_num_active_requests=self.max_num_active_requests,
255255
enable_iter_perf_stats=self.enable_iter_perf_stats,
256256
batch_wait_timeout_ms=self.batch_wait_timeout_ms,
257-
is_disaggregated=kv_cache_transceiver is not None,
258257
)
259258
self.executor_request_queue.set_exclude_last_generation_logits(
260259
self.disable_overlap_scheduler, self.dist.pp_size)

tests/unittest/_torch/executor/test_executor_request_queue.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@ def executor_queue(mock_dist):
4242
max_beam_width=1,
4343
max_num_active_requests=16,
4444
enable_iter_perf_stats=True,
45-
batch_wait_timeout_ms=0.0,
46-
is_disaggregated=False)
45+
batch_wait_timeout_ms=0.0)
4746

4847

4948
@pytest.fixture
@@ -55,8 +54,7 @@ def integration_queue(mock_dist):
5554
max_beam_width=2,
5655
max_num_active_requests=8,
5756
enable_iter_perf_stats=True,
58-
batch_wait_timeout_ms=0.0,
59-
is_disaggregated=False)
57+
batch_wait_timeout_ms=0.0)
6058

6159

6260
def test_executor_queue_init(executor_queue, mock_dist):
@@ -65,7 +63,6 @@ def test_executor_queue_init(executor_queue, mock_dist):
6563
assert not executor_queue.enable_attention_dp
6664
assert executor_queue.max_beam_width == 1
6765
assert executor_queue.max_num_active_requests == 16
68-
assert not executor_queue.is_disaggregated
6966
assert executor_queue.next_request_id == 8
7067
assert executor_queue.enable_iter_perf_stats
7168
assert executor_queue.active
@@ -124,8 +121,7 @@ def test_merge_helix_requests_with_padding(mock_dist):
124121
max_beam_width=1,
125122
max_num_active_requests=16,
126123
enable_iter_perf_stats=True,
127-
batch_wait_timeout_ms=0.0,
128-
is_disaggregated=True)
124+
batch_wait_timeout_ms=0.0)
129125

130126
# Mock _should_exclude_last_generation_logits.
131127
with patch.object(executor_queue,
@@ -181,8 +177,7 @@ def test_merge_helix_requests_without_padding(mock_dist):
181177
max_beam_width=1,
182178
max_num_active_requests=16,
183179
enable_iter_perf_stats=True,
184-
batch_wait_timeout_ms=0.0,
185-
is_disaggregated=True)
180+
batch_wait_timeout_ms=0.0)
186181

187182
# Mock _should_exclude_last_generation_logits.
188183
with patch.object(executor_queue,
@@ -235,8 +230,7 @@ def test_merge_helix_requests_insufficient_blocks_error(mock_dist):
235230
max_beam_width=1,
236231
max_num_active_requests=16,
237232
enable_iter_perf_stats=True,
238-
batch_wait_timeout_ms=0.0,
239-
is_disaggregated=True)
233+
batch_wait_timeout_ms=0.0)
240234

241235
with pytest.raises(
242236
ValueError,
@@ -598,8 +592,7 @@ def attention_dp_queue(mock_dist_attention_dp):
598592
max_beam_width=2,
599593
max_num_active_requests=8,
600594
enable_iter_perf_stats=True,
601-
batch_wait_timeout_ms=0.0,
602-
is_disaggregated=False)
595+
batch_wait_timeout_ms=0.0)
603596
# Initialize all_ranks_num_active_requests
604597
return queue
605598

0 commit comments

Comments
 (0)