Skip to content

Commit f430a4b

Browse files
kaiyuxIbrahimAmin1fjoswPzzzzz5142hemant-co
authored
Update TensorRT-LLM (NVIDIA#1688)
* Update TensorRT-LLM --------- Co-authored-by: IbrahimAmin <[email protected]> Co-authored-by: Fabian Joswig <[email protected]> Co-authored-by: Pzzzzz <[email protected]> Co-authored-by: CoderHam <[email protected]> Co-authored-by: Konstantin Lopuhin <[email protected]>
1 parent 5d8ca2f commit f430a4b

File tree

529 files changed

+1163937
-9114
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

529 files changed

+1163937
-9114
lines changed

.gitignore

-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@ __pycache__/
88
build*/
99
*.egg-info/
1010
.coverage
11-
*.csv
1211
*.onnx
1312
tmp/
1413
venv/

CHANGELOG.md

-210
This file was deleted.

README.md

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@ TensorRT-LLM
66

77
[![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://nvidia.github.io/TensorRT-LLM/)
88
[![python](https://img.shields.io/badge/python-3.10.12-green)](https://www.python.org/downloads/release/python-31012/)
9-
[![cuda](https://img.shields.io/badge/cuda-12.4.0-green)](https://developer.nvidia.com/cuda-downloads)
9+
[![cuda](https://img.shields.io/badge/cuda-12.4.1-green)](https://developer.nvidia.com/cuda-downloads)
1010
[![trt](https://img.shields.io/badge/TRT-10.0.1-green)](https://developer.nvidia.com/tensorrt)
11-
[![version](https://img.shields.io/badge/release-0.10.0.dev-green)](./setup.py)
11+
[![version](https://img.shields.io/badge/release-0.11.0.dev-green)](./tensorrt_llm/version.py)
1212
[![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE)
1313

1414
[Architecture](./docs/source/architecture/overview.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Results](./docs/source/performance/perf-overview.md)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Examples](./examples/)&nbsp;&nbsp;&nbsp;|&nbsp;&nbsp;&nbsp;[Documentation](./docs/source/)

benchmarks/cpp/gptManagerBenchmark.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -906,7 +906,7 @@ class GptServer
906906
[this](uint64_t requestId, std::list<NamedTensor> const& response_tensors, bool final_response,
907907
std::string const& errMsg)
908908
{ return sendResponse(requestId, response_tensors, final_response, errMsg); },
909-
nullptr, iterationDataCallback, optionalParams, terminateReqId, std::nullopt, excludeInputInOutput);
909+
nullptr, iterationDataCallback, optionalParams, terminateReqId, excludeInputInOutput);
910910
}
911911

912912
~GptServer()

benchmarks/cpp/prepare_dataset.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
import click
1919
from pydantic import BaseModel, field_validator
2020
from transformers import AutoTokenizer
21+
from transformers.tokenization_utils import PreTrainedTokenizer
22+
from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
2123
from utils.prepare_real_data import dataset
2224
from utils.prepare_synthetic_data import token_norm_dist
2325

@@ -27,10 +29,12 @@ class RootArgs(BaseModel):
2729
output: str
2830
random_seed: int
2931
task_id: int
32+
std_out: bool
3033
rand_task_id: Optional[Tuple[int, int]]
3134

3235
@field_validator('tokenizer')
33-
def get_tokenizer(cls, v: str):
36+
def get_tokenizer(cls,
37+
v: str) -> PreTrainedTokenizer | PreTrainedTokenizerFast:
3438
try:
3539
tokenizer = AutoTokenizer.from_pretrained(v, padding_side='left')
3640
except EnvironmentError as e:
@@ -53,6 +57,11 @@ def get_tokenizer(cls, v: str):
5357
type=str,
5458
help="Output json filename.",
5559
default="preprocessed_dataset.json")
60+
@click.option(
61+
"--stdout",
62+
is_flag=True,
63+
help="Print output to stdout with a JSON dataset entry on each line.",
64+
default=False)
5665
@click.option("--random-seed",
5766
required=False,
5867
type=int,
@@ -80,6 +89,7 @@ def cli(ctx, **kwargs):
8089

8190
ctx.obj = RootArgs(tokenizer=kwargs['tokenizer'],
8291
output=kwargs['output'],
92+
std_out=kwargs['stdout'],
8393
random_seed=kwargs['random_seed'],
8494
task_id=kwargs['task_id'],
8595
rand_task_id=kwargs['rand_task_id'])

benchmarks/cpp/utils/prepare_real_data.py

+17-9
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
import click
77
from datasets import load_dataset
88
from pydantic import BaseModel, model_validator
9-
from utils.utils import dataset_dump, get_norm_dist_tokens
9+
from utils.utils import dataset_dump, get_norm_dist_tokens, print_dataset
1010

1111

1212
def validate_output_len_dist(ctx, param, value):
@@ -220,11 +220,19 @@ def dataset(root_args, **kwargs):
220220
logging.debug(f"Input lengths: {[len(i) for i in input_ids]}")
221221
logging.debug(f"Output lengths: {output_lens}")
222222

223-
dataset_dump(
224-
input_lens, input_ids, output_lens, task_ids, {
225-
"workload_type": "dataset",
226-
"tokenizer": root_args.tokenizer.__class__.__name__,
227-
"num_requests": len(input_ids),
228-
"max_input_len": max(input_lens),
229-
"max_output_len": max(output_lens)
230-
}, root_args.output)
223+
if not root_args.std_out:
224+
dataset_dump(
225+
input_lens, input_ids, output_lens, task_ids, {
226+
"workload_type": "dataset",
227+
"tokenizer": root_args.tokenizer.__class__.__name__,
228+
"num_requests": len(input_ids),
229+
"max_input_len": max(input_lens),
230+
"max_output_len": max(output_lens)
231+
}, root_args.output)
232+
else:
233+
print_dataset(
234+
task_ids,
235+
input_ids,
236+
output_lens,
237+
tokenizer=None,
238+
)

benchmarks/cpp/utils/prepare_synthetic_data.py

+20-13
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import random
22

33
import click
4-
from utils.utils import dataset_dump, gen_random_tokens, get_norm_dist_tokens
4+
from utils.utils import (dataset_dump, gen_random_tokens, get_norm_dist_tokens,
5+
print_dataset)
56

67

78
@click.command()
@@ -55,15 +56,21 @@ def token_norm_dist(root_args, **kwargs):
5556
min_id, max_id = root_args.rand_task_id
5657
task_ids = [random.randint(min_id, max_id) for _ in range(num_reqs)]
5758

58-
dataset_dump(
59-
input_lens, input_ids, output_lens, task_ids, {
60-
"workload_type": "token-norm-dist",
61-
"input_mean": kwargs['input_mean'],
62-
"input_stdev": kwargs['input_stdev'],
63-
"output_mean": kwargs['output_mean'],
64-
"output_stdev": kwargs['output_stdev'],
65-
"num_requests": kwargs['num_requests'],
66-
"tokenize_vocabsize": root_args.tokenizer.vocab_size,
67-
"max_input_len": max_input_len,
68-
"max_output_len": max_output_len
69-
}, root_args.output)
59+
if not root_args.std_out:
60+
dataset_dump(
61+
input_lens, input_ids, output_lens, task_ids, {
62+
"workload_type": "token-norm-dist",
63+
"input_mean": kwargs['input_mean'],
64+
"input_stdev": kwargs['input_stdev'],
65+
"output_mean": kwargs['output_mean'],
66+
"output_stdev": kwargs['output_stdev'],
67+
"num_requests": kwargs['num_requests'],
68+
"tokenize_vocabsize": root_args.tokenizer.vocab_size,
69+
"max_input_len": max_input_len,
70+
"max_output_len": max_output_len
71+
}, root_args.output)
72+
else:
73+
print_dataset(
74+
input_ids,
75+
output_lens,
76+
)

benchmarks/cpp/utils/utils.py

+11-1
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,17 @@ def dataset_dump(input_lens, input_ids, output_lens, task_ids, metadata,
4343
task_id=task_ids[i]))
4444
workload = Workload(metadata=metadata, samples=samples)
4545
with open(output_file, 'w') as f:
46-
json.dump(workload.dict(), f)
46+
json.dump(workload.model_dump(), f)
47+
48+
49+
def print_dataset(input_ids, output_lens):
50+
for i, input_tokens in enumerate(input_ids):
51+
d = {
52+
"task_id": i,
53+
"logits": input_tokens,
54+
"output_tokens": output_lens[i]
55+
}
56+
print(json.dumps(d, separators=(',', ':'), ensure_ascii=False))
4757

4858

4959
def get_list_of_delays(delay_dist, mean_time_bet_reqs, num_reqs, random_seed):

0 commit comments

Comments
 (0)