tensoRT | CUDA support
Description
How to Reproduce
I tried to use simple onnx model which I exported from pytorch Resnet50 model and convert it to tensorRT in ade. But it says that ade doesn't have both cuda, pycuda and tensorRT. My question is whether it possible to install and use tensorRT in ade? Are there any guidelines for that? Or it's obliged to use only TVM.
from resnet_export_onnx import preprocess_image, postprocess
import torch
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
import numpy as np
ONNX_FILE_PATH = "resnet50.onnx"
# logger to capture errors, warnings, and other information during the build and inference phases
TRT_LOGGER = trt.Logger()
def build_engine(onnx_file_path):
# initialize TensorRT engine and parse ONNX model
builder = trt.Builder(TRT_LOGGER)
network = builder.create_network()
parser = trt.OnnxParser(network, TRT_LOGGER)
# allow TensorRT to use up to 1GB of GPU memory for tactic selection
builder.max_workspace_size = 1 << 30
# we have only one image in batch
builder.max_batch_size = 1
# use FP16 mode if possible
if builder.platform_has_fast_fp16:
builder.fp16_mode = True
# parse ONNX
with open(onnx_file_path, 'rb') as model:
print('Beginning ONNX file parsing')
parser.parse(model.read())
print('Completed parsing of ONNX file')
# generate TensorRT engine optimized for the target platform
print('Building an engine...')
engine = builder.build_cuda_engine(network)
context = engine.create_execution_context()
print("Completed creating Engine")
return engine, context
def main():
# initialize TensorRT engine and parse ONNX model
engine, context = build_engine(ONNX_FILE_PATH)
# get sizes of input and output and allocate memory required for input data and for output data
for binding in engine:
if engine.binding_is_input(binding): # we expect only one input
input_shape = engine.get_binding_shape(binding)
input_size = trt.volume(input_shape) * engine.max_batch_size * np.dtype(np.float32).itemsize # in bytes
device_input = cuda.mem_alloc(input_size)
else: # and one output
output_shape = engine.get_binding_shape(binding)
# create page-locked memory buffers (i.e. won't be swapped to disk)
host_output = cuda.pagelocked_empty(trt.volume(output_shape) * engine.max_batch_size, dtype=np.float32)
device_output = cuda.mem_alloc(host_output.nbytes)
# Create a stream in which to copy inputs/outputs and run inference.
stream = cuda.Stream()
# preprocess input data
host_input = np.array(preprocess_image("turkish_coffee.jpg").numpy(), dtype=np.float32, order='C')
cuda.memcpy_htod_async(device_input, host_input, stream)
# run inference
context.execute_async(bindings=[int(device_input), int(device_output)], stream_handle=stream.handle)
cuda.memcpy_dtoh_async(host_output, device_output, stream)
stream.synchronize()
# postprocess results
output_data = torch.Tensor(host_output).reshape(engine.max_batch_size, output_shape[0])
postprocess(output_data)
if __name__ == '__main__':
main()
Current Behavior
Traceback (most recent call last):
File "tensorRT_run.py", line 3, in <module>
import tensorrt as trt
ModuleNotFoundError: No module named 'tensorrt'
File "tensorRT_run.py", line 3, in <module>
import pycuda.driver as cuda
ModuleNotFoundError: No module named 'pycuda'
Expected behavior
Possibility to use cuda, tensorRT in ade (AutoWare).