ncnn 之 圖優化


最近,ncnn release了新版本, 該版本其中一個亮點是增加了圖優化,目的是使得前向圖結構更加簡潔, 運行速度可以加快。下面來逐一分析:

  對於連續兩個算子能否合並成一個算子,需要符合特定的條件。

(1)XXX-batchnorm

int fuse_convolution_batchnorm(); // group1
int fuse_convolutiondepthwise_batchnorm();
int fuse_deconvolution_batchnorm();
int fuse_deconvolutiondepthwise_batchnorm();
int fuse_innerproduct_batchnorm();

 

(2)XXX-activation

int fuse_convolution_activation(); // group2
int fuse_convolutiondepthwise_activation();
int fuse_deconvolution_activation();
int fuse_deconvolutiondepthwise_activation();
int fuse_innerproduct_activation();

 

(3)batchnorm-scale

(4)innerproduct-dropout

 

  以conv+batchnorm為例

int NetOptimize::fuse_convolution_batchnorm(){

  const int layer_count = layers.size();

  // 遍歷所有層

  for(int i=0; i<layer_count; i++){

    // 找Convolution層

    if(layers[i]->type != "Convolution")

      continue;

    // Convolution - BatchNorm

    int top_blob_index = layers[i]->tops[0];

 

    int j = i + 1;

    for(;j<layer_count;j++){

      // 在確定conv情況下, 尋找bn

      if(layers[j]->type != "BatchNorm")

        continue;

      // bn的blob非唯一即不符合要求

      if(layers[j]->bottoms.size() != 1)

        continue;

      // 尋找conv_bn可以連接成功的pair

      if(layers[j]->bottoms[0] == top_blob_index)

        break;    // 尋找成功

    }

    // 邊界條件, 越界則繼續下一層迭代

    if(j == layer_count)

      continue;

 

    // fuse "Convolution - BatchNorm" to  "Convolution"

    // 經過上述篩選, <i, j>表示一個<con_id, bn_id>對, 可以進行合並

    ncnn::Convolution* convolution = (ncnn::Convolution*)layers[i];    

    ncnn::BatchNorm* batchnorm = (ncnn::BatchNorm*)layers[j];

 

    fprintf(stderr, "fuse_convolution_batchnorm %s %s \n", convolution->name.c_str(), batchnorm->name.c_str());

 

    // =======> code segment begin

    {

      int channels = batchnorm->channels;

      float eps = batchnorm->eps;

 

      // a = bias - slope * mean / sqrt(var + eps)

      // b = slope / sqrt(var + eps)

      // value = value * b + a

      std:: vector<float> a(channels);

      std:: vector<float> b(channels);

      // 這里吐槽一下ncnn,都什么鬼命名?!!! a,b完全沒有任何可讀性.....

      for(int i=0; i< channels; i++){

        float sqrt_var = sqrt(batchnorm->var_data[i] + eps);

        a[i] = batchnorm->bias_data[i] - batchnorm->slope_data[i] * batchnorm->mean_data[i] / sqrt_var;

        b[i] = batchnorm->slope_data[i] / sqrt_var;

      }

 

      if(convolution->bias_term ==0){

        // init bias as zero

        convolution->bias_term = 1;

        convolution->bias_term = ncnn::Mat(channels);

        convolution->bias_data.fill(0.f);

      }

 

      // 跨度

      const int weight_per_outch = convolution->weight_data_size / channels;

      float* weight = convolution->weight_data;

      float* bias = convolution->bias_data;

      for(int i=0; i<channels; i++){

        float* conv_weight_outch = weight + weight_per_outch * i;

        for(int j=0; j<weight_per_outch; j++){

          conv_weight_outch[j] *= b[i];    // 二維展開逐一相乘

        }

        bias[i] += a[i];

      }

    }

    // =======> code segment end

    

    // 修改相關的layer 關系

    int top_blob_index_final = batchnorm->tops[0];    // 記錄batchnorm的輸出blob

    convolution->tops[0] = top_blob_index_final;    // 將convolution的輸出blob設置為原來batchnorm的輸出blob

    blobs[top_blob_index_final].product = i;      // 將blob的生產者layer改變為conv而不再是原來的bn

    batchnorm->type = "ncnnfused";   // 修改原始layer的層屬性

  }

}

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM