tvm c++部署官方教程
https://github.com/apache/tvm/tree/main/apps/howto_deploy
官方說執行run_example.sh腳本就可以完成部署
c++部署代碼
https://github.com/apache/tvm/blob/main/apps/howto_deploy/cpp_deploy.cc
Makefile文件
https://github.com/apache/tvm/blob/main/apps/howto_deploy/Makefile
結合Makefile文件和run_example.sh腳本一起看
腳本先創建lib目錄,然后執行sudo make命令,make操作的執行要看Makefile文件
make命令會先在lib文件夾中編譯一個名為libtvm_runtime_pack.o的靜態鏈接庫
然后運行prepare_test_lib.py文件生成將模型生成為test_addone_dll.so,test_addone_sys.o和test_relay_add.so三個庫,給cpp_deploy.cc調用,生成兩個可執行文件cpp_deploy_pack和cpp_deploy_normal
我的目標是用其他框架寫的深度學習網絡通過tvm轉換成so文件,使用c++部署,在gpu上進行調用,下面是cpu上部署的代碼
思路很簡單就是把數據讀進來,set_input,run然后get_output,修改了將target修改成cuda后並不能成功在gpu上運行,會出現core dump的問題
原因是想要讓模型在gpu上運行,需要在gpu上開辟內存,然后將數據拷貝到gpu上運行,這個代碼沒有這些操作所以運行時會導致core崩潰
下面是tvm c++部署調用gpu的完整過程,深度學習模型使用keras寫的mnist手寫體識別網絡,保存成了pb格式,模型代碼就不放了,這里直接讀取pb文件進行轉化,模型輸入是(1,784),輸出是(1,10)
導入頭文件
import tvm from tvm import te from tvm import relay # os and numpy import numpy as np import os.path # Tensorflow imports import tensorflow as tf try: tf_compat_v1 = tf.compat.v1 except ImportError: tf_compat_v1 = tf # Tensorflow utility functions import tvm.relay.testing.tf as tf_testing from tvm.contrib import graph_runtime
參數設置
#cpu #target = "llvm" #target_host = "llvm" #layout = None #ctx = tvm.cpu(0) #gpu target = "cuda" target_host = 'llvm' layout = "NCHW" ctx = tvm.gpu(0)
處理數據
from tensorflow.python.keras.datasets import mnist from tensorflow.python.keras.utils import np_utils (x_train,y_train),(x_test,y_test)=mnist.load_data() x_test1=x_test.reshape(x_test.shape[0],x_test.shape[1]*x_test.shape[2]) print(x_train.shape,x_test.shape) print(y_train.shape,y_test.shape) x_train=x_train.reshape(x_train.shape[0],x_train.shape[1]*x_train.shape[2]) x_test=x_test.reshape(x_test.shape[0],x_test.shape[1]*x_test.shape[2]) x_train=x_train/255 x_test=x_test/255 y_train=np_utils.to_categorical(y_train) y_test=np_utils.to_categorical(y_test) print(x_train.shape,x_test.shape) print(y_train.shape,y_test.shape) with open("data.txt",'w') as wf: for i in range(784): wf.write(str(x_test1[12][i])) wf.write('\n')
讀取模型
with tf_compat_v1.gfile.GFile('./frozen_models/simple_frozen_graph.pb', "rb") as f: graph_def = tf_compat_v1.GraphDef() graph_def.ParseFromString(f.read()) graph = tf.import_graph_def(graph_def, name="") # Call the utility to import the graph definition into default graph. graph_def = tf_testing.ProcessGraphDefParam(graph_def) # Add shapes to the graph. config = tf.compat.v1.ConfigProto(allow_soft_placement=True) with tf_compat_v1.Session() as sess: graph_def = tf_testing.AddShapesToGraphDef(sess, "Identity") tensor_name_list = [tensor.name for tensor in tf.compat.v1.get_default_graph().as_graph_def().node] for tensor_name in tensor_name_list: print(tensor_name,'\n')
構建
shape_dict = {"x": x_train[0:1].shape} print(shape_dict) dtype_dict = {"x": "uint8"} mod, params = relay.frontend.from_tensorflow(graph_def, layout=layout, shape=shape_dict) print("Tensorflow protobuf imported to relay frontend.")
編譯成tvm模型
with tvm.transform.PassContext(opt_level=3):
lib = relay.build(mod, target=target, target_host=target_host, params=params)
測試一下tvm模型能不能用
from tvm.contrib import graph_runtime tt=np.zeros([1,784]) i=0 file=open("data.txt") while 1: line=file.readline() if not line: break tt[0][i]=int(line) i+=1 file.close() dtype = "float32" m = graph_runtime.GraphModule(lib["default"](ctx)) # set inputs m.set_input("x", tvm.nd.array(tt.astype(dtype))) # execute m.run() # get outputs tvm_output = m.get_output(0, tvm.nd.empty(((1, 10)), "float32")) print(tvm_output.shape,tvm_output)
保存模型
from tvm.contrib import utils temp=utils.tempdir() path_lib=temp.relpath("/home/aiteam/test_code/model.so") print(path_lib) lib.export_library(path_lib) print(temp.listdir())
然后進入到tvm/apps/howto_deploy目錄,修改tvm_runtime_pack.cc文件,加上頭文件
#include "../../src/runtime/cuda/cuda_device_api.cc" #include "../../src/runtime/cuda/cuda_module.cc"
然后再寫一個cc文件存放自己的部署代碼,修改Makefile文件進行編譯
我的文件名是cpp_deploy_bkp.cc
修改后的Makefile文件
# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # Makefile Example to deploy TVM modules. TVM_ROOT=$(shell cd ../..; pwd) DMLC_CORE=${TVM_ROOT}/3rdparty/dmlc-core PKG_CFLAGS = -std=c++14 -g -fPIC\ -I${TVM_ROOT}/include\ -I${DMLC_CORE}/include\ -I${TVM_ROOT}/3rdparty/dlpack/include\ -I/usr/local/cuda/include PKG_LDFLAGS = -L${TVM_ROOT}/build -ldl -pthread -L/usr/local/cuda/lib64 -lcudart -lcuda .PHONY: clean all all:lib/libtvm_runtime_pack.o lib/cpp_deploy_pack #all: lib/cpp_deploy_pack lib/cpp_deploy_normal # Build rule for all in one TVM package library .PHONY: lib/libtvm_runtime_pack.o lib/libtvm_runtime_pack.o: tvm_runtime_pack.cc @mkdir -p $(@D) $(CXX) -c $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS) # Deploy using the all in one TVM package library .PHONY: lib/cpp_deploy_pack lib/cpp_deploy_pack: cpp_deploy_bkp.cc lib/libtvm_runtime_pack.o @mkdir -p $(@D) $(CXX) $(PKG_CFLAGS) -o $@ $^ $(PKG_LDFLAGS)
里面要加上cuda頭文件的位置和動態鏈接庫的位置
cpp_deploy_bkp.cc
/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ /*! * \brief Example code on load and run TVM module.s * \file cpp_deploy.cc */ #include <dlpack/dlpack.h> #include <tvm/runtime/module.h> #include <tvm/runtime/packed_func.h> #include <tvm/runtime/registry.h> #include <cstdio> #include <fstream> #include <sstream> #include <string> using namespace std; template <class Type> Type stringToNum(const string& str) { istringstream iss(str); Type num; iss >> num; return num; } void DeployGraphRuntime() { constexpr int dtype_code= 2U; constexpr int dtype_bits=32; constexpr int dtype_lines=1; constexpr int device_type= 2; constexpr int device_id=0; int ndim=2; int64_t in_shape[2]={1,784}; int64_t out_shape[2]={1,10}; DLTensor* DLTX=nullptr; DLTensor* DLTY=nullptr; TVMArrayAlloc(in_shape,ndim,dtype_code,dtype_bits,dtype_lines,device_type,device_id,&DLTX); TVMArrayAlloc(out_shape,ndim,dtype_code,dtype_bits,dtype_lines,device_type,device_id,&DLTY); float img[784]; float rslt[10]; ifstream in("/home/aiteam/tiwang/data.txt"); //int image[784]; string s; int image_index=0; /* while(getline(in,s)) { image[i]=stringToNum<int>(s); ++i; }*/ bool enabled = tvm::runtime::RuntimeEnabled("cuda"); if (!enabled) { LOG(INFO) << "Skip heterogeneous test because cuda is not enabled."<< "\n"; return; } LOG(INFO) << "Running graph runtime..."; // load in the library DLContext ctx{kDLGPU, 0}; tvm::runtime::Module mod_factory = tvm::runtime::Module::LoadFromFile("/home/aiteam/test_code/model.so"); // create the graph runtime module tvm::runtime::Module gmod = mod_factory.GetFunction("default")(ctx); tvm::runtime::PackedFunc set_input = gmod.GetFunction("set_input"); tvm::runtime::PackedFunc get_output = gmod.GetFunction("get_output"); tvm::runtime::PackedFunc run = gmod.GetFunction("run"); // Use the C++ API while(getline(in,s)) { if(image_index%28==0) printf("\n"); //static_cast<float*>(x->data)[image_index]=((float)stringToNum<int>(s))/255; img[image_index]=((float)stringToNum<int>(s))/255; int a=stringToNum<int>(s); printf("%4d",a); image_index++; } TVMArrayCopyFromBytes(DLTX,&img[0],image_index*sizeof(float)); // set the right input set_input("x", DLTX); // run the code run(); // get the output get_output(0, DLTY); TVMArrayCopyToBytes(DLTY,&rslt[0],10*sizeof(float)); for(int i=0;i<10;++i) { LOG(INFO)<<rslt[i]; //LOG(INFO)<<static_cast<float*>(y->data)[i]; } } int main(void) { //DeploySingleOp(); DeployGraphRuntime(); return 0; }
相比於之前cpu部署的代碼,gpu部署多了一個拷貝張量的過程
參照
https://discuss.tvm.apache.org/t/deploy-nnvm-module-using-c-on-gpu-using-opencl-target/229
最終結果
首先在tvm/apps/howto_deplpy目錄下執行sudo make
編譯通過,運行可執行文件 ./lib/cpp_deploy_pack