Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
[]
Empty file.
212 changes: 212 additions & 0 deletions byte_infer_perf/general_perf/backends/ILUVATAR/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,212 @@
import random
import torch
import time
import ctypes
import argparse
import numpy as np
from os.path import join, dirname, exists

import tensorrt
from tensorrt import Dims
import pycuda.driver as cuda
from cuda import cuda,cudart
from datasets import load_dataset
from torch.utils.data import SequentialSampler, DataLoader
from transformers import DataCollatorForLanguageModeling, BertTokenizer


def setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True


def load_ixrt_plugin(logger=tensorrt.Logger(tensorrt.Logger.INFO), namespace="", dynamic_path=""):
if not dynamic_path:
dynamic_path = join(dirname(tensorrt.__file__), "lib", "libixrt_plugin.so")

if not exists(dynamic_path):
raise FileNotFoundError(
f"The ixrt_plugin lib {dynamic_path} is not existed, please provided effective plugin path!")

ctypes.CDLL(dynamic_path, mode=ctypes.RTLD_GLOBAL)
tensorrt.init_libnvinfer_plugins(logger, namespace)
print(f"Loaded plugin from {dynamic_path}")


def build_engine(model_name, onnx_model_path, engine_path, MaxBatchSize):
IXRT_LOGGER = tensorrt.Logger(tensorrt.Logger.WARNING)
builder = tensorrt.Builder(IXRT_LOGGER)
EXPLICIT_BATCH = 1 << (int)(tensorrt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
network = builder.create_network(EXPLICIT_BATCH)
build_config = builder.create_builder_config()

profile = builder.create_optimization_profile()

if model_name == 'resnet50':
profile.set_shape(
"input", Dims([1, 3,224,224]), Dims([32, 3,224,224]), Dims([64, 3,224,224]))

elif model_name == 'yolov5':
profile.set_shape(
"images", Dims([1, 3,640,640]), Dims([32, 3,640,640]), Dims([64, 3,640,640]))

elif model_name == 'bert':
profile.set_shape("input_ids.1", Dims([1, 384]), Dims([16, 384]), Dims([MaxBatchSize, 384]))
profile.set_shape("attention_mask.1", Dims([1, 384]), Dims([16, 384]), Dims([MaxBatchSize, 384]))
profile.set_shape("token_type_ids.1", Dims([1, 384]), Dims([16, 384]), Dims([MaxBatchSize, 384]))

elif model_name == 'widedeep':
profile.set_shape(
"new_numeric_placeholder:0", Dims([MaxBatchSize, 13]), Dims([MaxBatchSize, 13]), Dims([MaxBatchSize, 13]))
profile.set_shape(
"new_categorical_placeholder:0", Dims([MaxBatchSize * 26, 2]), Dims([MaxBatchSize * 26, 2]), Dims([MaxBatchSize * 26, 2]))
profile.set_shape(
"import/head/predictions/zeros_like:0", Dims([MaxBatchSize, 1]), Dims([MaxBatchSize, 1]), Dims([MaxBatchSize, 1]))
else:
pass

build_config.add_optimization_profile(profile)

parser = tensorrt.OnnxParser(network, IXRT_LOGGER)
parser.parse_from_file(onnx_model_path)
build_config.set_flag(tensorrt.BuilderFlag.FP16)

# set dynamic
num_inputs = network.num_inputs

for i in range(num_inputs):
if model_name == 'resnet50':
input_tensor = network.get_input(i)
input_tensor.shape = Dims([-1, 3, 224, 224])

elif model_name == 'yolov5':
input_tensor = network.get_input(i)
input_tensor.shape = Dims([-1, 3, 640, 640])
network.get_input(i).dtype = tensorrt.float16

elif model_name == 'bert':
input_tensor = network.get_input(i)
input_tensor.shape = Dims([-1, 384])

elif model_name == 'widedeep':
input_tensor = network.get_input(i)
if i == 0:
input_tensor.shape = Dims([-26, 2])
elif i == 1:
input_tensor.shape = Dims([-1, 13])
else:
input_tensor.shape = Dims([-1, 1])

else:
pass

plan = builder.build_serialized_network(network, build_config)

with open(engine_path, "wb") as f:
f.write(plan)

print("Build dynamic shape engine done!")


def init_by_tensorrt(engine_path):
datatype = tensorrt.DataType.FLOAT
host_mem = tensorrt.IHostMemory
logger = tensorrt.Logger(tensorrt.Logger.ERROR)

with open(engine_path, "rb") as f, tensorrt.Runtime(logger) as runtime:
runtime = tensorrt.Runtime(logger)
assert runtime
engine = runtime.deserialize_cuda_engine(f.read())
assert engine
context = engine.create_execution_context()
assert context

return engine, context


def setup_io_bindings(engine, context):
# Setup I/O bindings
inputs = []
outputs = []
allocations = []

for i in range(engine.num_bindings):
is_input = False
if engine.binding_is_input(i):
is_input = True

name = engine.get_binding_name(i)
dtype = engine.get_binding_dtype(i)
shape = context.get_binding_shape(i)

if is_input:
batch_size = shape[0]
size = np.dtype(tensorrt.nptype(dtype)).itemsize

for s in shape:
size *= s

# allocation = cuda.mem_alloc(size)
err, allocation = cudart.cudaMalloc(size)
assert err == cudart.cudaError_t.cudaSuccess

binding = {
"index": i,
"name": name,
"dtype": np.dtype(tensorrt.nptype(dtype)),
"shape": list(shape),
"allocation": allocation,
"nbytes": size
}
# print(
# f"binding {i}, name : {name} dtype : {np.dtype(tensorrt.nptype(dtype))} shape : {list(shape)}"
# )
allocations.append(allocation)

if engine.binding_is_input(i):
inputs.append(binding)
else:
outputs.append(binding)

return inputs, outputs, allocations


def tensorrt_infer_dynamic(engine, context, input_ids, token_type_ids):
input_names = [
"input_ids",
"token_type_ids"
]

# set dynamic shape
for input_name in input_names:
if input_name == "input_ids":
input_shape = input_ids.shape
elif input_name == "token_type_ids":
input_shape = token_type_ids.shape

input_idx = engine.get_binding_index(input_name)
context.set_binding_shape(input_idx, Dims(input_shape))

# Setup I/O bindings
inputs, outputs, allocations = setup_io_bindings(engine, context)

### infer
# Prepare the output data
output = np.zeros(outputs[0]["shape"], outputs[0]["dtype"])

# Process I/O and execute the network
cuda.memcpy_htod(inputs[0]["allocation"], input_ids)
cuda.memcpy_htod(inputs[1]["allocation"], token_type_ids)

torch.cuda.synchronize()
time_start = time.time()
context.execute_v2(allocations)
torch.cuda.synchronize()
time_each = time.time() - time_start

cuda.memcpy_dtoh(output, outputs[0]["allocation"])

return output, time_each
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
# Copyright 2023 Graphcore Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import logging

import tensorrt
from tensorrt import Dims
from general_perf.backends.ILUVATAR.common import load_ixrt_plugin
load_ixrt_plugin()

from general_perf.backends.ILUVATAR.common import build_engine
from general_perf.tools import torch_to_onnx

from general_perf.backends import compile_backend

log = logging.getLogger("CompileBackendILUVATAR")


class CompileBackendILUVATAR(compile_backend.CompileBackend):
def __init__(self):
super(CompileBackendILUVATAR, self).__init__()
self.hardware_type = "ILUVATAR"
self.need_reload = False
self.model_runtimes = []
self.model_config = None

def version(self) -> str:
"""Return compile backend version details."""
return tensorrt.__version__

def compile(self, configs, dataloader=None):
model_name = configs["model_info"]["model"].split("-")[0]
MaxBatchSize = configs['model_info']['max_batch_size']
onnx_model_path = configs['model_info']['onnx_model_path']
engine_path = configs['model_info']['engine_path']

# build engine
if model_name == 'widedeep':
for bs in configs['workload']['batch_sizes']:
engine_paths = "general_perf/general_perf/model_zoo/regular/open_wide_deep_saved_model/widedeep_dynamicshape_sim_" + str(bs) + ".engine"
build_engine(model_name=model_name, onnx_model_path=onnx_model_path, engine_path=engine_paths, MaxBatchSize=bs)
else:
build_engine(model_name=model_name, onnx_model_path=onnx_model_path, engine_path=engine_path, MaxBatchSize=MaxBatchSize)

result = {
"model": configs["model_info"]["model"],
"engine_path": engine_path,
"model_name": configs['model_info']["model"].split("-")[0],
"framework": configs["model_info"]["framework"],
"framework_iluvatar": configs["model_info"]["framework_iluvatar"],
"compile_precision": configs['model_info']['model_precision'],
"input_type": configs["model_info"]["input_type"].split(","),
"max_batch_size": configs["model_info"]["max_batch_size"],
"compile_status":"success",
"sg_percent": 100,
"segments": [
{
"sg_idx": 0,
"is_fallback": False,
"input_tensor_map": configs["model_info"]["input_shape"],
"output_tensor_map": configs["model_info"]["outputs"],
"compiled_model": [
{
"compiled_bs": 1,
"compiled_obj": configs['model_info']['model_path'],
},
],
},
],
}

self.configs = result
self.workload = configs['workload']
self.model_info = configs["model_info"]

for key, value in result.items():
print('{key}: {value}'.format(key=key, value=value))

return result


def get_interact_profile(self, configs):
"""
Collect information for core engine to let user interactively fill in configurations.
"""
return []

def get_best_batch_size(self):
"""Get Best Batch Size for the model.

Usually take the max batch size can be loaded to IPU as the best batch size to
get highest throughput.
"""
return None

# 下面的两个函数待优化, 目前得到的onnx模型都是事先转换好的
# to do
def get_onnx(self, model_path, onnx_path):
torch_to_onnx(model_path, onnx_path)

def pre_optimize(self, configs):
# todo: pt转onnx模型
model_name = configs["model_info"]["model"].split("-")[0]

if model_name == "resnet50":
configs["model_info"]["model_path"] = "general_perf/general_perf/model_zoo/regular/open_resnet50/resnet50.onnx"

elif model_name == "yolov5":
configs["model_info"]["model_path"] = 'general_perf/general_perf/model_zoo/popular/open_yolov5/yolov5s_sim.onnx'

return configs
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
transformers==4.35.2
datasets==2.14.7
onnx==1.15.0
pandas==2.1.3
Loading