onnx模型部署：TensorRT、OpenVino、ONNXRuntime、OpenCV dnn

本文轉載自查看原文 2022-01-21 17:05 2717 深度學習

無論用什么框架訓練的模型，推薦轉為onnx格式，方便部署。

支持onnx模型的框架如下：

TensorRT：英偉達的，用於GPU推理加速。注意需要英偉達GPU硬件的支持。
OpenVino：英特爾的，用於CPU推理加速。注意需要英特爾CPU硬件的支持。
ONNXRuntime：微軟，亞馬遜，Facebook 和 IBM 等公司共同開發的，可用於GPU、CPU
OpenCV dnn：OpenCV的調用模型的模塊

pt格式的模型，可以用Pytorch框架部署。

推理效率上：TensorRT>OpenVino>ONNXRuntime>OpenCV dnn>Pytorch

由於電腦只有CPU，因此研究下OpenVino、ONNXRuntime、OpenCV dnn的C++使用。

【ONNXRuntime C++】

mini-batches of 3-channel RGB images of shape (N x 3 x H x W), where N is the batch size, and H and W are expected to be at least 224。

#include <onnxruntime_cxx_api.h>

#include <opencv2/dnn/dnn.hpp>
#include <opencv2/imgcodecs.hpp>
#include <opencv2/imgproc.hpp>

#include <chrono>
#include <cmath>
#include <exception>
#include <fstream>
#include <iostream>
#include <limits>
#include <numeric>
#include <string>
#include <vector>

template <typename T>
T vectorProduct(const std::vector<T>& v)
{
    return accumulate(v.begin(), v.end(), 1, std::multiplies<T>());
}

/**
 * @brief Operator overloading for printing vectors
 * @tparam T
 * @param os
 * @param v
 * @return std::ostream&
 */
template <typename T>
std::ostream& operator<<(std::ostream& os, const std::vector<T>& v)
{
    os << "[";
    for (int i = 0; i < v.size(); ++i)
    {
        os << v[i];
        if (i != v.size() - 1)
        {
            os << ", ";
        }
    }
    os << "]";
    return os;
}

/**
 * @brief Print ONNX tensor data type
 * https://github.com/microsoft/onnxruntime/blob/rel-1.6.0/include/onnxruntime/core/session/onnxruntime_c_api.h#L93
 * @param os
 * @param type
 * @return std::ostream&
 */
std::ostream& operator<<(std::ostream& os,
    const ONNXTensorElementDataType& type)
{
    switch (type)
    {
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UNDEFINED:
        os << "undefined";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT:
        os << "float";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT8:
        os << "uint8_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT8:
        os << "int8_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT16:
        os << "uint16_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT16:
        os << "int16_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32:
        os << "int32_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_INT64:
        os << "int64_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING:
        os << "std::string";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL:
        os << "bool";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT16:
        os << "float16";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_DOUBLE:
        os << "double";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT32:
        os << "uint32_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_UINT64:
        os << "uint64_t";
        break;
    case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX64:
        os << "float real + float imaginary";
        break;
        case ONNXTensorElementDataType::
        ONNX_TENSOR_ELEMENT_DATA_TYPE_COMPLEX128:
            os << "double real + float imaginary";
            break;
        case ONNXTensorElementDataType::ONNX_TENSOR_ELEMENT_DATA_TYPE_BFLOAT16:
            os << "bfloat16";
            break;
        default:
            break;
    }

    return os;
}

std::vector<std::string> readLabels(std::string& labelFilepath)
{
    std::vector<std::string> labels;
    std::string line;
    std::ifstream fp(labelFilepath);
    while (std::getline(fp, line))
    {
        labels.push_back(line);
    }
    return labels;
}

int main()
{
    bool useCUDA{ false };

    std::string instanceName{ "image-classification-inference" };
    std::string modelFilepath{ "best-sim.onnx" };
    std::string imageFilepath{ "D:/barcode.jpg" };
    std::string labelFilepath{ "label.txt" };
    //讀取 txt
    std::vector<std::string> labels{ readLabels(labelFilepath) };
    //指定日志嚴重性記錄級別（onnxruntime必須指定）
    Ort::Env env(OrtLoggingLevel::ORT_LOGGING_LEVEL_WARNING, instanceName.c_str());
    Ort::SessionOptions sessionOptions;
    //sessionOptions.SetIntraOpNumThreads(1);
    if (useCUDA)
    {
        // Using CUDA backend
        // https://github.com/microsoft/onnxruntime/blob/v1.8.2/include/onnxruntime/core/session/onnxruntime_cxx_api.h#L329
        //OrtCUDAProviderOptions cuda_options{ 0 };
        //sessionOptions.AppendExecutionProvider_CUDA(cuda_options);
    }

    // 設置優化等級
    // Available levels are
    // ORT_DISABLE_ALL -> To disable all optimizations
    // ORT_ENABLE_BASIC -> To enable basic optimizations (Such as redundant node removals) 
    // ORT_ENABLE_EXTENDED -> To enable extended optimizations (Includes level 1 + more complex optimizations like node fusions)
    // ORT_ENABLE_ALL -> To Enable All possible optimizations
    sessionOptions.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);
    //讀取 模型        
    std::wstring widestr = std::wstring(modelFilepath.begin(), modelFilepath.end());//string轉wstring
    Ort::Session session(env, widestr.c_str(), sessionOptions);

    Ort::AllocatorWithDefaultOptions allocator;

    size_t numInputNodes = session.GetInputCount();
    size_t numOutputNodes = session.GetOutputCount();
    std::cout << "Number of Input Nodes: " << numInputNodes << std::endl;
    std::cout << "Number of Output Nodes: " << numOutputNodes << std::endl;

    const char* inputName = session.GetInputName(0, allocator);
    std::cout << "Input Name: " << inputName << std::endl;

    Ort::TypeInfo inputTypeInfo = session.GetInputTypeInfo(0);
    auto inputTensorInfo = inputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType inputType = inputTensorInfo.GetElementType();
    std::cout << "Input Type: " << inputType << std::endl;
    std::vector<int64_t> inputDims = inputTensorInfo.GetShape();
    std::cout << "Input Dimensions: " << inputDims << std::endl;
    const char* outputName = session.GetOutputName(0, allocator);
    std::cout << "Output Name: " << outputName << std::endl;

    Ort::TypeInfo outputTypeInfo = session.GetOutputTypeInfo(0);
    auto outputTensorInfo = outputTypeInfo.GetTensorTypeAndShapeInfo();

    ONNXTensorElementDataType outputType = outputTensorInfo.GetElementType();
    std::cout << "Output Type: " << outputType << std::endl;
    std::vector<int64_t> outputDims = outputTensorInfo.GetShape();
    std::cout << "Output Dimensions: " << outputDims << std::endl;

    //【圖像標准化】——————————————————————————————————————————————————————————————————————【begin】
    //操作思路：BGR圖——縮放——轉RGB——歸一化[0,1]——HWC轉CHW——轉std::vector<float>（模型的輸入）
    //OpenCV讀入的圖像是HWC格式，PyTorch是CHW（其依賴的cuda和cudnn被設計為CHW，便於卷積等運算）
    cv::Mat imageBGR = cv::imread(imageFilepath, cv::ImreadModes::IMREAD_COLOR);
    cv::Mat preprocessedImage;      //RRR-GGG-BBB順序
    cv::dnn::blobFromImage(imageBGR, preprocessedImage,1/255.0, cv::Size(inputDims.at(2), inputDims.at(3)), cv::Scalar(0, 0, 0), true, false);  //縮放——轉RGB——歸一化[0,1]——HWC轉CHW   
    //3通道轉一維向量std::vector<float>
    std::vector<float> inputTensorValues;
    inputTensorValues.assign(preprocessedImage.begin<float>(), preprocessedImage.end<float>());
    //【圖像標准化】——————————————————————————————————————————————————————————————————————【end】

    size_t outputTensorSize = vectorProduct(outputDims);
    //assert(("Output tensor size should equal to the label set size.", labels.size() == outputTensorSize));
    std::vector<float> outputTensorValues(outputTensorSize);

    std::vector<const char*> inputNames{ inputName };
    std::vector<const char*> outputNames{ outputName };
    //std::vector<Ort::Value> inputTensors;
    //std::vector<Ort::Value> outputTensors;

    Ort::MemoryInfo memoryInfo = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemType::OrtMemTypeDefault);
    Ort::Value inputTensor = Ort::Value::CreateTensor<float>(memoryInfo, inputTensorValues.data(), inputTensorValues.size(), inputDims.data(), inputDims.size());
    //預測
    std::vector<Ort::Value> ort_outputs= session.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());
       
    //畫框繼續研究中…………
    //獲取結果的類別序號、標簽、置信度
    int predId = 0;
    float activation = 0;
    float maxActivation = std::numeric_limits<float>::lowest();
    float expSum = 0;
    for (int i = 0; i < labels.size(); i++)
    {
        activation = outputTensorValues.at(i);
        expSum += std::exp(activation);
        if (activation > maxActivation)
        {
            predId = i;
            maxActivation = activation;
        }
    }
    std::cout << "Predicted Label ID: " << predId << std::endl;
    std::cout << "Predicted Label: " << labels.at(predId) << std::endl;
    std::cout << "Uncalibrated Confidence: " << std::exp(maxActivation) / expSum << std::endl;

    // 100次平均值
    int numTests{ 100 };
    std::chrono::steady_clock::time_point begin = std::chrono::steady_clock::now();
    for (int i = 0; i < numTests; i++)
    {
        session.Run(Ort::RunOptions{ nullptr }, inputNames.data(), &inputTensor, inputNames.size(), outputNames.data(), outputNames.size());
    }
    std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
    std::cout << "Mean Inference Latency: "
        << std::chrono::duration_cast<std::chrono::milliseconds>(end - begin).count() / static_cast<float>(numTests)
        << " ms" << std::endl;
}

leimao博主的案例leimao/ONNX-Runtime-Inference: ONNX Runtime Inference C++ Example (github.com)

官方教程onnxruntime-inference-examples/c_cxx at main · microsoft/onnxruntime-inference-examples (github.com)

其他博主 https://github.com/iwanggp/yolov5_onnxruntime_deploy

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 YOLOX在OpenVINO、ONNXRUNTIME、TensorRT上面推理部署與速度比較 YOLOv5最新6.1在OpenCV DNN、OpenVINO、ONNXRUNTIME上推理對比 ONNX、NCNN、OpenVINO、 TensorRT了解與簡介模型部署 ONNX ONNX runtim Mxnet模型轉換ONNX，再用tensorrt執行前向運算 Mxnet模型轉換ONNX，再用tensorrt執行前向運算 opencv調用Onnx模型常見問題安裝ONNX_tensorrt 用於ONNX的TensorRT后端基於TensorRT的YOLO（V3\4\5）模型部署《方案一》