Skip to content

Commit a524027

Browse files
committed
add rpc test list
Signed-off-by: chunweiy <[email protected]> Signed-off-by: Superjomn <[email protected]>
1 parent 3e84bb4 commit a524027

File tree

6 files changed

+102
-1
lines changed

6 files changed

+102
-1
lines changed
Lines changed: 85 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,85 @@
1+
# A Lightweight RPC
2+
This is a pure-Python lightweight RPC we build to simplify our existing IPC code in the orchestrator part. It provides multiple call modes (sync, async, future, streaming) and supports both IPC and TCP connections.
3+
4+
## Examples
5+
### Create Server and Client
6+
7+
```python
8+
from tensorrt_llm.executor.rpc import RPCServer, RPCClient
9+
10+
# Define your application
11+
class App:
12+
def add(self, a: int, b: int) -> int:
13+
return a + b
14+
15+
async def async_multiply(self, x: int, y: int) -> int:
16+
return x * y
17+
18+
# Create and start server
19+
app = App()
20+
with RPCServer(app) as server:
21+
server.bind("ipc:///tmp/my_rpc") # or "tcp://127.0.0.1:5555"
22+
server.start()
23+
24+
# Create client and make calls
25+
with RPCClient("ipc:///tmp/my_rpc") as client:
26+
result = client.add(5, 3).remote()
27+
print(result) # Output: 8
28+
```
29+
30+
### Different Remote Calls
31+
32+
#### Synchronous Call
33+
```python
34+
# Blocking call that waits for result
35+
result = client.add(10, 20).remote()
36+
# or with timeout
37+
result = client.add(10, 20).remote(timeout=5.0)
38+
```
39+
40+
#### Asynchronous Call
41+
```python
42+
# Async call that returns a coroutine
43+
result = await client.async_multiply(3, 4).remote_async()
44+
```
45+
46+
#### Future-based Call
47+
```python
48+
# Returns a concurrent.futures.Future
49+
future = client.add(1, 2).remote_future()
50+
# Get result later
51+
result = future.result()
52+
```
53+
54+
#### Fire-and-Forget Call
55+
```python
56+
# Send request without waiting for response
57+
client.submit_task(task_id=123).remote(need_response=False)
58+
```
59+
60+
#### Streaming Call
61+
```python
62+
# For async generator methods
63+
async for value in client.stream_data(n=10).remote_streaming():
64+
print(f"Received: {value}")
65+
```
66+
67+
### Error Handling
68+
```python
69+
from tensorrt_llm.executor.rpc import RPCError, RPCTimeout
70+
71+
try:
72+
result = client.risky_operation().remote(timeout=1.0)
73+
except RPCTimeout:
74+
print("Operation timed out")
75+
except RPCError as e:
76+
print(f"RPC Error: {e}")
77+
print(f"Original cause: {e.cause}")
78+
print(f"Traceback: {e.traceback}")
79+
```
80+
81+
### Graceful Shutdown
82+
```python
83+
# Shutdown server from client
84+
client.shutdown_server()
85+
```

tests/integration/test_lists/test-db/l0_a10.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ l0_a10:
4646
- unittest/llmapi/test_serialization.py
4747
- unittest/llmapi/test_utils.py
4848
- unittest/llmapi/test_llm_args.py
49+
# executor
50+
- unittest/executor/test_rpc.py
4951
- condition:
5052
ranges:
5153
system_gpu_count:

tests/integration/test_lists/test-db/l0_a100.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,10 @@ l0_a100:
1616
- unittest/llmapi/test_llm_pytorch.py
1717
- unittest/llmapi/test_mpi_session.py # generic tests
1818
- unittest/trt/model_api/test_model_quantization.py
19+
# executor
20+
- unittest/executor/test_base_worker.py
21+
- unittest/executor/test_rpc_proxy.py
22+
- unittest/executor/test_rpc_worker.py
1923
- condition:
2024
ranges:
2125
system_gpu_count:

tests/unittest/executor/test_base_worker.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# isort: off
1313
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/..")
1414
from utils.llm_data import llm_models_root
15+
from utils.util import skip_single_gpu
1516
# isort: on
1617

1718
from tensorrt_llm._torch.pyexecutor.config import update_executor_config
@@ -156,6 +157,8 @@ def create_worker_session(self):
156157
session = MpiPoolSession(n_workers=2)
157158
return session
158159

160+
@pytest.mark.gpu2
161+
@skip_single_gpu
159162
def test_create_executor(self):
160163
futures = self.session.submit(
161164
TestRpcWorkerBaseTP2.create_executor,

tests/unittest/executor/test_rpc_proxy.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
# isort: off
1515
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/..")
1616
from utils.llm_data import llm_models_root
17-
from utils.util import similar
17+
from utils.util import similar, skip_single_gpu
1818
# isort: on
1919

2020
model_path = llm_models_root() / "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
@@ -78,6 +78,8 @@ def test_tp1(self, num_reqs):
7878
assert isinstance(kv_cache_events, list)
7979

8080
@pytest.mark.parametrize("num_reqs", [1, 10])
81+
@skip_single_gpu
82+
@pytest.mark.gpu2
8183
def test_tp2(self, num_reqs):
8284
tokenizer = TransformersTokenizer.from_pretrained(model_path)
8385
prompt = "A B C D"

tests/unittest/executor/test_rpc_worker.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# isort: off
1919
sys.path.append(os.path.dirname(os.path.abspath(__file__)) + "/..")
2020
from utils.llm_data import llm_models_root
21+
from utils.util import skip_single_gpu
2122
# isort: on
2223

2324
model_path = llm_models_root() / "llama-models-v2/TinyLlama-1.1B-Chat-v1.0"
@@ -215,11 +216,15 @@ def create_worker_session(self):
215216
def create_rpc_client(self, addr: str):
216217
return RPCClient(addr)
217218

219+
@skip_single_gpu
220+
@pytest.mark.gpu2
218221
def test_create_shutdown(self):
219222
# Invoke setup_engine in rank 0, and that will unblock all the ranks to
220223
# invoke setup_engine simultaneously.
221224
pass
222225

226+
@skip_single_gpu
227+
@pytest.mark.gpu2
223228
def test_fetch_responses_sync(self):
224229
# Wait a bit to ensure engine is ready
225230
time.sleep(1)

0 commit comments

Comments
 (0)