本文是基於TensorRT 5.0.2基礎上,關於其內部的introductory_parser_samples例子的分析和介紹。
1 引言
假設當前路徑為:
TensorRT-5.0.2.6/samples
其對應當前例子文件目錄樹為:
# tree python
python/
├── common.py
├── introductory_parser_samples
│ ├── caffe_resnet50.py
│ ├── onnx_resnet50.py
│ ├── README.md
│ ├── requirements.txt
│ └── uff_resnet50.py
該例子展示如何使用TensorRT和包含的對應解析器(UFF,Caffe,ONNX解析器),基於在不同框架下訓練的ResNet-50結構來進行inference。
- caffe_resnet50: 該例子展示如何構建基於Caffe解析器去解析Caffe訓練的模型,並構建引擎然后進行inference;
- onnx_resnet50:該例子展示如何基於開源的ONNX解析ONNX模型,並inference;
- uff_resnet50: 該例子展示如何從一個UFF模型文件(從一個tf protobuf轉換過來)構建引擎,然后inference。
2 caffe_resnet50
所需要的文件內容包含:
/TensorRT-5.0.2.6/python/data/resnet50/
├── binoculars-cc0.jpeg
├── binoculars.jpeg
├── canon-cc0.jpeg
├── class_labels.txt
├── mug-cc0.jpeg
├── reflex_camera.jpeg
├── ResNet50_fp32.caffemodel
├── resnet50-infer-5.uff
├── ResNet50_N2.prototxt
├── ResNet50.onnx
└── tabby_tiger_cat.jpg
先上完整代碼,從main函數開始,逐個調用外部的參數完成整個流程,整個代碼還是挺簡單的:
# 該例子使用Caffe ResNet50 模型去創建一個TensorRT Inference Engine
import random
import argparse
from collections import namedtuple
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的創建和清理工作
import tensorrt as trt
import sys, os
# sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 這里將common中的GiB和find_sample_data函數移動到該py文件中,保證自包含。
def GiB(val):
'''以GB為單位,計算所需要的存儲值,向左位移10bit表示KB,20bit表示MB '''
return val * 1 << 30
def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
'''該函數就是一個參數解析函數。
Parses sample arguments.
Args:
description (str): Description of the sample.
subfolder (str): The subfolder containing data relevant to this sample
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
Returns:
str: Path of data directory.
Raises:
FileNotFoundError
'''
# 為了簡潔,這里直接將路徑硬編碼到代碼中。
data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")
subfolder_path = os.path.join(data_root, subfolder)
if not os.path.exists(subfolder_path):
print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
if not (os.path.exists(data_path)):
raise FileNotFoundError(data_path + " does not exist.")
for index, f in enumerate(find_files):
find_files[index] = os.path.abspath(os.path.join(data_path, f))
if not os.path.exists(find_files[index]):
raise FileNotFoundError(find_files[index] + " does not exist. ")
if find_files:
return data_path, find_files
else:
return data_path
#-----------------
_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'DEPLOY_PATH', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50_fp32.caffemodel",
DEPLOY_PATH = "ResNet50_N2.prototxt",
INPUT_SHAPE = (3, 224, 224),
OUTPUT_NAME = "prob",
DTYPE = trt.float32 ) # 可以將TensorRT數據類型用trt.nptype()轉換到numpy類型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
'''main中第二步:構建一個tensorRT engine '''
# The Caffe path is used for Caffe2 models.
def build_engine_caffe(model_file, deploy_file):
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network() as network, \
trt.CaffeParser() as parser:
# Workspace size是builder在構建engine時候最大可以使用的內存大小,其越高越好
builder.max_workspace_size = GiB(1)
# 載入caffe模型,然后進行解析,並填充TensorRT的network。該函數返回一個對象,其可以通過name進行檢索tensors
model_tensors = parser.parse(deploy=deploy_file, model=model_file, network=network, dtype=ModelData.DTYPE)
# 對於caffe,需要手動標記網絡的輸出;因為我們原本就該知道輸出tensor的name,所以可以直接找到
network.mark_output(model_tensors.find(ModelData.OUTPUT_NAME))
return builder.build_cuda_engine(network)
'''main中第三步:分配host和device端的buffers,然后創建一個流 '''
def allocate_buffers(engine):
# 設定維度,然后在host端內存創建page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# 為輸入和輸出分配device端內存.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# 創建一個流來copy輸入/輸出,並用於執行inference。
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
'''main中第四步:讀取測試樣本,並歸一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):
# 將輸入圖像變換成CHW Numpy數組
def normalize_image(image):
c, h, w = ModelData.INPUT_SHAPE
return np.asarray(image.resize((w, h),
Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# 歸一化該圖片,然后copy到內存設定的pagelocked buffer區域.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
'''main中第五步:執行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# 將輸入數據移動到GPU的device端
cuda.memcpy_htod_async(d_input, h_input, stream)
# 執行inference
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# 將結果從device端移動到host端
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# 同步流操作
stream.synchronize()
def main():
''' 1 - 讀取模型文件,測試樣本等等 '''
data_path, data_files = find_sample_data(
description="Runs a ResNet50 network with a TensorRT inference engine.",
subfolder="resnet50",
find_files=["binoculars.jpeg",
"reflex_camera.jpeg",
"tabby_tiger_cat.jpg",
ModelData.MODEL_PATH,
ModelData.DEPLOY_PATH,
"class_labels.txt"])
test_images = data_files[0:3] # 三張測試圖片
caffe_model_file, caffe_deploy_file, labels_file = data_files[3:] # caffe的模型文件,部署文件和標簽文件
labels = open(labels_file, 'r').read().split('\n') # 讀取標簽
''' 2 - 用build_engine_caffe函數構建一個TensorRT engine. '''
with build_engine_caffe(caffe_model_file, caffe_deploy_file) as engine:
# Inference不論用哪個parser構建engine都是這個流程
''' 3 - 分配buffer和創建一個CUDA流. '''
h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
''' 4 - 下面的context用於執行inference '''
with engine.create_execution_context() as context:
''' 選擇測試樣本,然后進行歸一化,並塞入host端的page-locked buffer '''
test_image = random.choice(test_images)
test_case = load_normalized_test_case(test_image, h_input)
# 運行該engine。輸出是一個1000的向量,每個值表示分到該類的概率。
do_inference(context, h_input, d_input, h_output, d_output, stream)
# 提取最高概率的元素,並將其索引映射到對應的label上
pred = labels[np.argmax(h_output)]
if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
print("Correctly recognized " + test_case + " as " + pred)
else:
print("Incorrectly recognized " + test_case + " as " + pred)
if __name__ == '__main__':
main()
3 onnx_resnet50
從下面的代碼和上面例子代碼進行對比,發現還是相對一致的流程,就是其中個別函數有所不同。
# # 該例子使用ONNX ResNet50 模型去創建一個TensorRT Inference Engine
import random
from PIL import Image
from collections import namedtuple
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的創建和清理工作
import tensorrt as trt
import sys, os
# import common
# 這里將common中的GiB和find_sample_data函數移動到該py文件中,保證自包含。
def GiB(val):
'''以GB為單位,計算所需要的存儲值,向左位移10bit表示KB,20bit表示MB '''
return val * 1 << 30
def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
'''該函數就是一個參數解析函數。
Parses sample arguments.
Args:
description (str): Description of the sample.
subfolder (str): The subfolder containing data relevant to this sample
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
Returns:
str: Path of data directory.
Raises:
FileNotFoundError
'''
# 為了簡潔,這里直接將路徑硬編碼到代碼中。
data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")
subfolder_path = os.path.join(data_root, subfolder)
if not os.path.exists(subfolder_path):
print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
if not (os.path.exists(data_path)):
raise FileNotFoundError(data_path + " does not exist.")
for index, f in enumerate(find_files):
find_files[index] = os.path.abspath(os.path.join(data_path, f))
if not os.path.exists(find_files[index]):
raise FileNotFoundError(find_files[index] + " does not exist. ")
if find_files:
return data_path, find_files
else:
return data_path
#-----------------
_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_SHAPE', 'DTYPE'])
ModelData = _ModelData(MODEL_PATH = "ResNet50.onnx",
INPUT_SHAPE = (3, 224, 224),
DTYPE = trt.float32 ) # 可以將TensorRT數據類型用trt.nptype()轉換到numpy類型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
'''main中第二步:構建一個tensorRT engine '''
# The Onnx path is used for Onnx models.
def build_engine_onnx(model_file):
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network() as network, \
trt.OnnxParser(network, TRT_LOGGER) as parser:
# Workspace size是builder在構建engine時候最大可以使用的內存大小,其越高越好
builder.max_workspace_size = GiB(1)
''' 載入caffe模型,然后進行解析,並填充TensorRT的network'''
with open(model_file, 'rb') as model:
parser.parse(model.read())
return builder.build_cuda_engine(network)
'''main中第三步:分配host和device端的buffers,然后創建一個流 '''
def allocate_buffers(engine):
# 設定維度,然后在host端內存創建page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# 為輸入和輸出分配device端內存.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# 創建一個流來copy輸入/輸出,並用於執行inference。
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
'''main中第四步:讀取測試樣本,並歸一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):
# 將輸入圖像變換成CHW Numpy數組
def normalize_image(image):
'''這個函數與第一個例子略有不同 '''
c, h, w = ModelData.INPUT_SHAPE
image_arr = np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# 該ResNet 5-需要一些預處理,特別是均值歸一化
return (image_arr / 255.0 - 0.45) / 0.225
# 歸一化該圖片,然后copy到內存設定的pagelocked buffer區域.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
'''main中第五步:執行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# 將輸入數據移動到GPU的device端
cuda.memcpy_htod_async(d_input, h_input, stream)
# 執行inference
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# 將結果從device端移動到host端
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# 同步流操作
stream.synchronize()
def main():
''' 1 - 讀取模型文件,測試樣本等等 '''
data_path, data_files = find_sample_data(
description="Runs a ResNet50 network with a TensorRT inference engine.",
subfolder="resnet50",
find_files=["binoculars.jpeg",
"reflex_camera.jpeg",
"tabby_tiger_cat.jpg",
ModelData.MODEL_PATH,
"class_labels.txt"])
test_images = data_files[0:3] # 三張測試圖片
onnx_model_file, labels_file = data_files[3:] # ONNX模型文件和標簽文件
labels = open(labels_file, 'r').read().split('\n') # 讀取標簽
''' 2 - 用build_engine_onnx函數構建一個TensorRT engine. '''
with build_engine_onnx(onnx_model_file) as engine:
# Inference不論用哪個parser構建engine都是這個流程,因為這里都是resnet-50結構
''' 3 - 分配buffer和創建一個CUDA流. '''
h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
''' 4 - 下面的context用於執行inference '''
with engine.create_execution_context() as context:
''' 選擇測試樣本,然后進行歸一化,並塞入host端的page-locked buffer '''
test_image = random.choice(test_images)
test_case = load_normalized_test_case(test_image, h_input)
# 運行該engine。輸出是一個1000的向量,每個值表示分到該類的概率。
do_inference(context, h_input, d_input, h_output, d_output, stream)
# 提取最高概率的元素,並將其索引映射到對應的label上
pred = labels[np.argmax(h_output)]
if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
print("Correctly recognized " + test_case + " as " + pred)
else:
print("Incorrectly recognized " + test_case + " as " + pred)
if __name__ == '__main__':
main()
4 uff_resnet50
從下面的例子可以看出,這三個例子流程大致一致,只有個別區域有少許變化。
UFF是TensorRT內部使用的統一框架格式,用於表示優化前的網絡結構圖,可以將諸如pb等模型格式先轉換成uff格式tensorrt-3-faster-tensorflow-inference
# 該例子使用UFF ResNet50 模型去創建一個TensorRT Inference Engine
import random
from PIL import Image
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit # 該import會讓pycuda自動管理CUDA上下文的創建和清理工作
import tensorrt as trt
import sys, os
#sys.path.insert(1, os.path.join(sys.path[0], ".."))
# import common
# 這里將common中的GiB和find_sample_data函數移動到該py文件中,保證自包含。
def GiB(val):
'''以GB為單位,計算所需要的存儲值,向左位移10bit表示KB,20bit表示MB '''
return val * 1 << 30
def find_sample_data(description="Runs a TensorRT Python sample", subfolder="", find_files=[]):
'''該函數就是一個參數解析函數。
Parses sample arguments.
Args:
description (str): Description of the sample.
subfolder (str): The subfolder containing data relevant to this sample
find_files (str): A list of filenames to find. Each filename will be replaced with an absolute path.
Returns:
str: Path of data directory.
Raises:
FileNotFoundError
'''
# 為了簡潔,這里直接將路徑硬編碼到代碼中。
data_root = kDEFAULT_DATA_ROOT = os.path.abspath("/TensorRT-5.0.2.6/python/data/resnet50/")
subfolder_path = os.path.join(data_root, subfolder)
if not os.path.exists(subfolder_path):
print("WARNING: " + subfolder_path + " does not exist. Using " + data_root + " instead.")
data_path = subfolder_path if os.path.exists(subfolder_path) else data_root
if not (os.path.exists(data_path)):
raise FileNotFoundError(data_path + " does not exist.")
for index, f in enumerate(find_files):
find_files[index] = os.path.abspath(os.path.join(data_path, f))
if not os.path.exists(find_files[index]):
raise FileNotFoundError(find_files[index] + " does not exist. ")
if find_files:
return data_path, find_files
else:
return data_path
#-----------------
class ModelData(object):
MODEL_PATH = "resnet50-infer-5.uff"
INPUT_NAME = "input"
INPUT_SHAPE = (3, 224, 224)
OUTPUT_NAME = "GPU_0/tower_0/Softmax"
# We can convert TensorRT data types to numpy types with trt.nptype()
DTYPE = trt.float32
_ModelData = namedtuple('_ModelData', ['MODEL_PATH', 'INPUT_NAME', 'INPUT_SHAPE', 'OUTPUT_NAME', 'DTYPE'])
ModelData = _ModelData(
MODEL_PATH = "resnet50-infer-5.uff",
INPUT_NAME = "input",
INPUT_SHAPE = (3, 224, 224),
OUTPUT_NAME = "GPU_0/tower_0/Softmax",
DTYPE = trt.float32 ) # 可以將TensorRT數據類型用trt.nptype()轉換到numpy類型
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
'''main中第二步:構建一個tensorRT engine '''
# The UFF path is used for TensorFlow models. You can convert a frozen TensorFlow graph to UFF using the included convert-to-uff utility.
def build_engine_uff(model_file):
with trt.Builder(TRT_LOGGER) as builder, \
builder.create_network() as network, \
trt.UffParser() as parser:
# Workspace size是builder在構建engine時候最大可以使用的內存大小,其越高越好
builder.max_workspace_size = GiB(1)
''' 這里需要手動注冊輸入和輸出節點到UFF'''
parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
parser.register_output(ModelData.OUTPUT_NAME)
''' 載入UFF模型,然后進行解析,並填充TensorRT的network'''
parser.parse(model_file, network)
return builder.build_cuda_engine(network)
'''main中第三步:分配host和device端的buffers,然后創建一個流 '''
def allocate_buffers(engine):
# 設定維度,然后在host端內存創建page-locked memory buffers (i.e. won't be swapped to disk)去存儲host端的輸入/輸出。
h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=trt.nptype(ModelData.DTYPE))
h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=trt.nptype(ModelData.DTYPE))
# 為輸入和輸出分配device端內存.
d_input = cuda.mem_alloc(h_input.nbytes)
d_output = cuda.mem_alloc(h_output.nbytes)
# 創建一個流來copy輸入/輸出,並用於執行inference。
stream = cuda.Stream()
return h_input, d_input, h_output, d_output, stream
'''main中第四步:讀取測試樣本,並歸一化 '''
def load_normalized_test_case(test_image, pagelocked_buffer):
# 將輸入圖像變換成CHW Numpy數組
def normalize_image(image):
c, h, w = ModelData.INPUT_SHAPE
return np.asarray(image.resize((w, h), Image.ANTIALIAS)).transpose([2, 0, 1]).astype(trt.nptype(ModelData.DTYPE)).ravel()
# 歸一化該圖片,然后copy到內存設定的pagelocked buffer區域.
np.copyto(pagelocked_buffer, normalize_image(Image.open(test_image)))
return test_image
'''main中第五步:執行inference '''
def do_inference(context, h_input, d_input, h_output, d_output, stream):
# 將輸入數據移動到GPU的device端
cuda.memcpy_htod_async(d_input, h_input, stream)
# 執行inference
context.execute_async(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
# 將結果從device端移動到host端
cuda.memcpy_dtoh_async(h_output, d_output, stream)
# 同步流操作
stream.synchronize()
def main():
''' 1 - 讀取模型文件,測試樣本等等 '''
data_path, data_files = find_sample_data(
description="Runs a ResNet50 network with a TensorRT inference engine.",
subfolder="resnet50",
find_files=["binoculars.jpeg",
"reflex_camera.jpeg",
"tabby_tiger_cat.jpg",
ModelData.MODEL_PATH,
"class_labels.txt"])
test_images = data_files[0:3] # 三張測試圖片
uff_model_file, labels_file = data_files[3:] # UFF模型文件和標簽文件
labels = open(labels_file, 'r').read().split('\n') # 讀取標簽
''' 2 - 用build_engine_uff函數構建一個TensorRT engine. '''
with build_engine_uff(uff_model_file) as engine:
# Inference不論用哪個parser構建engine都是這個流程,因為這里都是resnet-50結構
''' 3 - 分配buffer和創建一個CUDA流. '''
h_input, d_input, h_output, d_output, stream = allocate_buffers(engine)
''' 4 - 下面的context用於執行inference '''
with engine.create_execution_context() as context:
''' 選擇測試樣本,然后進行歸一化,並塞入host端的page-locked buffer '''
test_image = random.choice(test_images)
test_case = load_normalized_test_case(test_image, h_input)
# 運行該engine。輸出是一個1000的向量,每個值表示分到該類的概率。
do_inference(context, h_input, d_input, h_output, d_output, stream)
# 提取最高概率的元素,並將其索引映射到對應的label上
pred = labels[np.argmax(h_output)]
if "_".join(pred.split()) in os.path.splitext(os.path.basename(test_case))[0]:
print("Correctly recognized " + test_case + " as " + pred)
else:
print("Incorrectly recognized " + test_case + " as " + pred)
if __name__ == '__main__':
main()
.