Skip to content

Commit 177eb88

Browse files
Superjomnhchings
authored andcommitted
init
Signed-off-by: Superjomn <[email protected]> fix orchestrator_type and llm async example Signed-off-by: Superjomn <[email protected]> refactor rpc_client Signed-off-by: Superjomn <[email protected]> add gc trace in LLM Signed-off-by: Superjomn <[email protected]> .fix cancel future in zmq Signed-off-by: Superjomn <[email protected]> simplify rpc_client await_response Signed-off-by: Superjomn <[email protected]> restore orchestrator_type env Signed-off-by: Superjomn <[email protected]>
1 parent 461915f commit 177eb88

File tree

9 files changed

+312
-207
lines changed

9 files changed

+312
-207
lines changed

tensorrt_llm/_torch/pyexecutor/py_executor.py

Lines changed: 4 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,11 @@
11
import dataclasses
22
import datetime
33
import functools
4-
import gc
54
import os
65
import pickle # nosec B403
76
import threading
87
import time
98
import traceback
10-
import weakref
119
from contextlib import contextmanager
1210
from typing import Dict, Iterable, List, Optional, Tuple, Union
1311

@@ -22,8 +20,9 @@
2220

2321
from tensorrt_llm._torch.pyexecutor.resource_manager import (
2422
ResourceManagerType, request_context)
25-
from tensorrt_llm._utils import (customized_gc_thresholds, is_trace_enabled,
26-
mpi_disabled, nvtx_range, trace_func)
23+
from tensorrt_llm._utils import (customized_gc_thresholds, gc_nvtx_watcher,
24+
is_trace_enabled, mpi_disabled, nvtx_range,
25+
trace_func)
2726
from tensorrt_llm.bindings.executor import (DisServingRequestStats,
2827
FinishReason, InflightBatchingStats,
2928
IterationStats, KvCacheStats,
@@ -59,10 +58,6 @@
5958
# Format: "start1-stop1,start2-stop2,..." or single iterations "iter1,iter2,..."
6059
PROFILE_START_STOP_ENV_VAR_NAME = "TLLM_PROFILE_START_STOP"
6160

62-
# Environment variable to enable garbage collection profiling.
63-
# Set to "1" to enable recording of garbage collection events during profiling.
64-
PROFILE_RECORD_GC_ENV_VAR_NAME = "TLLM_PROFILE_RECORD_GC"
65-
6661
# Environment variable to enable PyTorch profiler tracing.
6762
# Set to a path to save detailed tracing of PyTorch operations.
6863
PROFILE_TRACE_ENV_VAR_NAME = "TLLM_TORCH_PROFILE_TRACE"
@@ -97,40 +92,6 @@ def _load_iteration_indexes(env_var: str):
9792
return frozenset(starts), frozenset(stops)
9893

9994

100-
class _GCNvtxHandle:
101-
pass
102-
103-
104-
def _gc_nvtx_watcher():
105-
enabled = os.environ.get(PROFILE_RECORD_GC_ENV_VAR_NAME, None)
106-
if not enabled:
107-
return None
108-
109-
range_id: Optional[int] = None
110-
111-
def gc_callback(phase, _):
112-
nonlocal range_id
113-
if phase == "start":
114-
assert range_id is None, "Unexpected state in GC callback: another GC while last GC not finished?"
115-
range_id = torch.cuda.nvtx.range_start("Python GC")
116-
elif phase == "stop":
117-
assert range_id is not None, "Unexpected state in GC callback: no active GC but got GC finished?"
118-
torch.cuda.nvtx.range_end(range_id)
119-
range_id = None
120-
121-
gc.callbacks.append(gc_callback)
122-
123-
def gc_cleanup(callback):
124-
try:
125-
gc.callbacks.remove(callback)
126-
except ValueError:
127-
pass
128-
129-
handle = _GCNvtxHandle()
130-
weakref.finalize(handle, gc_cleanup, gc_callback)
131-
return handle
132-
133-
13495
@dataclasses.dataclass
13596
class BatchState:
13697
sample_state: SampleState
@@ -178,7 +139,7 @@ def __init__(self,
178139
# profile config
179140
self.profile_start_iters, self.profile_stop_iters = _load_iteration_indexes(
180141
PROFILE_START_STOP_ENV_VAR_NAME)
181-
self.gc_nvtx_watcher_handle = _gc_nvtx_watcher()
142+
self.gc_nvtx_watcher_handle = gc_nvtx_watcher()
182143

183144
# related modules
184145
self.resource_manager = resource_manager

tensorrt_llm/_utils.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -923,6 +923,18 @@ def nvtx_range_debug(msg: str,
923923
return _null_context_manager()
924924

925925

926+
def nvtx_mark_debug(msg: str,
927+
color: str = "grey",
928+
domain: str = "TensorRT-LLM",
929+
category: Optional[str] = None) -> None:
930+
"""
931+
Creates an NVTX marker for debugging purposes.
932+
"""
933+
if os.getenv("TLLM_LLMAPI_ENABLE_NVTX", "0") == "1" or \
934+
os.getenv("TLLM_NVTX_DEBUG", "0") == "1":
935+
nvtx_mark(msg, color=color, domain=domain, category=category)
936+
937+
926938
def nvtx_mark(msg: str,
927939
color: str = "grey",
928940
domain: str = "TensorRT-LLM",
@@ -1200,3 +1212,50 @@ def is_device_integrated() -> bool:
12001212
if not torch.cuda.is_available():
12011213
return False
12021214
return torch.cuda.get_device_properties().is_integrated
1215+
1216+
1217+
# Environment variable to enable garbage collection profiling.
1218+
# Set to "1" to enable recording of garbage collection events during profiling.
1219+
PROFILE_RECORD_GC_ENV_VAR_NAME = "TLLM_PROFILE_RECORD_GC"
1220+
1221+
1222+
class _GCNvtxHandle:
1223+
"""Handle object for GC NVTX watcher to keep it alive."""
1224+
1225+
1226+
def gc_nvtx_watcher() -> Optional[_GCNvtxHandle]:
1227+
"""
1228+
Set up NVTX range markers for Python garbage collection events.
1229+
This helps in profiling to visualize when GC occurs during execution.
1230+
1231+
Returns:
1232+
_GCNvtxHandle or None: A handle object that keeps the GC callback alive,
1233+
or None if GC profiling is not enabled.
1234+
"""
1235+
enabled = os.environ.get(PROFILE_RECORD_GC_ENV_VAR_NAME, None)
1236+
if not enabled:
1237+
return None
1238+
1239+
range_id: Optional[int] = None
1240+
1241+
def gc_callback(phase, _):
1242+
nonlocal range_id
1243+
if phase == "start":
1244+
assert range_id is None, "Unexpected state in GC callback: another GC while last GC not finished?"
1245+
range_id = torch.cuda.nvtx.range_start("Python GC")
1246+
elif phase == "stop":
1247+
assert range_id is not None, "Unexpected state in GC callback: no active GC but got GC finished?"
1248+
torch.cuda.nvtx.range_end(range_id)
1249+
range_id = None
1250+
1251+
gc.callbacks.append(gc_callback)
1252+
1253+
def gc_cleanup(callback):
1254+
try:
1255+
gc.callbacks.remove(callback)
1256+
except ValueError:
1257+
pass
1258+
1259+
handle = _GCNvtxHandle()
1260+
weakref.finalize(handle, gc_cleanup, gc_callback)
1261+
return handle

tensorrt_llm/bench/benchmark/utils/general.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
from __future__ import annotations
22

33
import json
4-
from importlib.metadata import version
54
from pathlib import Path
65
from random import choices, shuffle
76
from typing import Dict, List, Tuple, Union
@@ -170,7 +169,7 @@ def get_settings(params: dict, dataset_metadata: DatasetMetadata, model: str,
170169

171170
backend = params.get("backend", "pytorch")
172171
return {
173-
"sw_version": version("tensorrt_llm"),
172+
"sw_version": "1.2",
174173
"model_path": model_path,
175174
"settings_config": {
176175
"max_batch_size": int(max_batch_size),

tensorrt_llm/executor/executor.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,7 @@ def _create_ray_executor(
368368
is_llm_executor: bool,
369369
tp_size: int,
370370
):
371+
logger.warning(f"Orchestrator is creating Ray executor")
371372
from .ray_executor import RayExecutor
372373

373374
return RayExecutor(worker_kwargs,
@@ -386,6 +387,7 @@ def _create_rpc_executor(
386387
):
387388
"""Create RPC-based executor (GenerationExecutorRpcProxy)."""
388389
from .rpc_proxy import GenerationExecutorRpcProxy
390+
logger.warning(f"Orchestrator is creating RPC executor")
389391
return GenerationExecutorRpcProxy(
390392
worker_kwargs,
391393
model_world_size=model_world_size,
@@ -408,6 +410,7 @@ def _create_ipc_executor(
408410
use_worker: If True, creates GenerationExecutorWorker (single process).
409411
If False, creates GenerationExecutorProxy (multi-process with IPC).
410412
"""
413+
logger.warning(f"Orchestrator is creating IPC executor")
411414
if use_worker:
412415
from .worker import GenerationExecutorWorker
413416
return GenerationExecutorWorker(**worker_kwargs,

0 commit comments

Comments
 (0)