MNN學習筆記(1)-----MNN中基於kl_divergence的8bit量化的方法和代碼解析

本文轉載自查看原文 2020-08-16 11:06 1543

　　mnn中8bit的量化方法基於兩種KL_divergenc 和ADMM 兩種方法，本文主要介紹基於kl_divergence的8bit量化方法；

mnn 編譯和執行命令

編譯：

cd MNN
mkdir build
cd build
cmake -DMNN_BUILD_QUANTOOLS=ON ..
make -j4

執行命令：

./quantized.out origin.mnn quan.mnn preprocessConfig.json

配置參數：

{
    "format":"RGB",
    "mean":[
        127.5,
        127.5,
        127.5
    ],
    "normal":[
        0.00784314,
        0.00784314,
        0.00784314
    ],
    "width":224,
    "height":224,
    "path":"path/to/images/",
    "used_image_num":500,
    "feature_quantize_method":"KL",
    "weight_quantize_method":"MAX_ABS"
}

默認的量化方法是采用kl_divergenc。

mnn 量化的原理：

　　借用幾張tensor rt的圖說明問題，將fp32裝換為int8，就是原來使用32bit來表示一個tensor，現在使用8bit來表示一個tensor。最簡單的方式就是線性量化：

                                   FP32 Tensor (T) = scale_factor(sf) * 8-bit Tensor(t) + FP32_bias (b)
實驗證明，偏置實際上是不需要的，去掉偏置，
　　　　　　　　　　　　　　　　　　　　   T=sf∗t

sf是每一層上每一個tensor 的比例因子（scaling factor）。實際mnn在處理過程中，卷積運算的每一個通道都計算了不同的比例因子。

簡單的將一個tensor 中的-|MAX|和|MAX| value 映射為-127和127.中間按照線性關系映射。這種映射關系是不飽和的。這種簡單的映射關系，會造成很大的精度損失。mnn和tensorrt的做法是如下：

這種做法不是講|max|映射為127，而是存在一個閾值|T|，將±|T|映射為±127.，大於±|T|的值直接映射為閾值±127. 目前使用的普遍方法是，使用kl散度取獲取kl散度最小的閾值T。

1、什么是kl_divergence

KL(Kullback-Leigler divergence)散度，是用來描述兩個概率分布P和Q的差異的一種方法。多應用於概率論和信息論中。在信息論中D(P||Q)表示用概率分布Q來擬合真實分布P時，產生的信息損耗，其中P表示真實分布，Q表示P的擬合分布。

KL散度的定義

python sample 代碼：

import numpy as np


def get_distribution(P):
	pmax = np.max(P)
	distribution=np.zeros(2048)
        interval =2048/pmax
	for i in P:
		index = int(np.fabs(i*interval))
		if index >= 2048:
			index = 2047
		distribution[index]= distribution[index] +1
	return distribution
def kl_divergence(P,Q,len):
	KL =0.0
	for i in range(len):
		try:
			if Q[i] == 0.0:
				KL = KL + 1		
			else:
				KL = KL+ P[i]*np.log(P[i]/Q[i])
		except:
			print 'Q:{},p:{}'.format(Q[i],P[i])
	return KL
def test():
	#P = np.random.rand(96*3*11*11)
	P = np.random.standard_normal(96*3*11*11)
	#Q = np.random.rand(96*3*11*11)
	Pdistribution = get_distribution(P)
        kl = np.inf
	for i in Pdistribution:
		if i ==0.0:
			print 'zeor'
	for k in range(128,2048):
		
		reference_distribution = Pdistribution[:k].copy()
		
		reference_distribution[k-1] = sum(Pdistribution[k::])
                interval = k/128.0
		#print interval
		quantized_distribution = np.zeros(k)
		
                for i in range(128):
			start = i*interval
			end   = (i+1)*interval
			
			leftupper  = int(np.ceil(start))
                        if leftupper > start:
				scale = leftupper-start
				quantized_distribution[i]  += scale * Pdistribution[leftupper-1]
			rightlower = int(np.floor(end))
		   	if rightlower < end:
				scale = end - rightlower
				quantized_distribution[i]  += scale * Pdistribution[rightlower]
				
			rightlower = int(np.floor(end))
			quantized_distribution[i] = sum(Pdistribution[leftupper:rightlower])
                expand_distribution = np.zeros(k)
		for i in range(128):
			start = i*interval
			end   = (i+1)*interval
			leftupper = int(np.ceil(start))
			count = 0
			if leftupper > start:
				count +=leftupper-start;
			rightlower =int(np.floor(end))
			if rightlower < end:
				count +=end -rightlower
			count = count+ rightlower - leftupper
			if count ==0:
				continue
			expandvalue = quantized_distribution[i]/count
			if leftupper > start and expand_distribution[leftupper-1] !=0:
				expand_distribution[leftupper-1] = expandvalue*(leftupper-start)
			if rightlower < end  and expand_distribution[rightlower] !=0:
				expand_distribution[rightlower] = expandvalue*(rightlower - end)
			expand_distribution[leftupper:rightlower] = expandvalue


		tempkl = kl_divergence(reference_distribution,expand_distribution,k)
		if tempkl < kl:
			kl = tempkl
			print 'kl :{},index:{}'.format(kl,k)
		#print 'kl :{},index:{}'.format(tempkl,k)
	
		#break

	return 
	

if __name__=="__main__":
	test()

　深度學習量化的過程中，真實的分布P，即每一個tensor 都會分為2048個bin。Q用int8 即[0-127]來擬合真實的分布P

MNN中是怎么計算kl_divergence

1、獲取Q的真實分布：

從量化模型的命令中可以看到，需要500張圖片來模擬真實數據的分布，500張圖片前向計算，來獲取每一層的分布，代碼入口在Calibration.cpp 文件中，代碼如下：

void Calibration::_computeFeatureScaleKL() {
    _computeFeatureMapsRange();
    _collectFeatureMapsDistribution();

    _scales.clear();
    for (auto& iter : _featureInfo) {
        AUTOTIME;
        _scales[iter.first] = iter.second->finishAndCompute();
    }
    //_featureInfo.clear();//No need now
}

　　函數_computeFeatureMapsRange 是統計每一個卷積層下每個channel下前向計算的最大值和最小值。_collectFeatureMapsDistribution，是根據獲取到每個channel下的最大值來統計2048個bin，每個bin下的權重分布。

finishAndCompute中的_computeThreshold 計算kl散度的最小值，找到最合適的閾值T

int TensorStatistic::_computeThreshold(const std::vector<float>& distribution) {
    const int targetBinNums = 128;
    int threshold           = targetBinNums;

    if (mThresholdMethod == THRESHOLD_KL) {
        float minKLDivergence   = 10000.0f;
        float afterThresholdSum = 0.0f;
        std::for_each(distribution.begin() + targetBinNums, distribution.end(),
                      [&](float n) { afterThresholdSum += n; });
        for (int i = targetBinNums; i < mBinNumber; ++i) {
            std::vector<float> quantizedDistribution(targetBinNums);
            std::vector<float> candidateDistribution(i);
            std::vector<float> expandedDistribution(i);
            std::copy(distribution.begin(), distribution.begin() + i, candidateDistribution.begin());
            candidateDistribution[i - 1] += afterThresholdSum;
            afterThresholdSum -= distribution[i];

            const float binInterval = (float)i / (float)targetBinNums;

            // merge i bins to target bins
            for (int j = 0; j < targetBinNums; ++j) {
                const float start = j * binInterval;
                const float end   = start + binInterval;

                const int leftUpper = static_cast<int>(std::ceil(start));
                if (leftUpper > start) {
                    const float leftScale = leftUpper - start;
                    quantizedDistribution[j] += leftScale * distribution[leftUpper - 1];
                }
                const int rightLower = static_cast<int>(std::floor(end));
                if (rightLower < end) {
                    const float rightScale = end - rightLower;
                    quantizedDistribution[j] += rightScale * distribution[rightLower];
                }
                std::for_each(distribution.begin() + leftUpper, distribution.begin() + rightLower,
                              [&](float n) { quantizedDistribution[j] += n; });
            }
            // expand target bins to i bins
            for (int j = 0; j < targetBinNums; ++j) {
                const float start   = j * binInterval;
                const float end     = start + binInterval;
                float count         = 0;
                const int leftUpper = static_cast<int>(std::ceil(start));
                float leftScale     = 0.0f;
                if (leftUpper > start) {
                    leftScale = leftUpper - start;
                    if (distribution[leftUpper - 1] != 0) {
                        count += leftScale;
                    }
                }
                const int rightLower = static_cast<int>(std::floor(end));
                float rightScale     = 0.0f;
                if (rightLower < end) {
                    rightScale = end - rightLower;
                    if (distribution[rightLower] != 0) {
                        count += rightScale;
                    }
                }

                std::for_each(distribution.begin() + leftUpper, distribution.begin() + rightLower, [&](float n) {
                    if (n != 0) {
                        count += 1;
                    }
                });

                if (count == 0) {
                    continue;
                }
                const float toExpandValue = quantizedDistribution[j] / count;
                if (leftUpper > start && distribution[leftUpper - 1] != 0) {
                    expandedDistribution[leftUpper - 1] += toExpandValue * leftScale;
                }
                if (rightLower < end && distribution[rightLower] != 0) {
                    expandedDistribution[rightLower] += toExpandValue * rightScale;
                }

                for (int k = leftUpper; k < rightLower; ++k) {
                    if (distribution[k] != 0) {
                        expandedDistribution[k] += toExpandValue;
                    }
                }
            }
            const float curKL = _klDivergence(candidateDistribution, expandedDistribution);
            // std::cout << "=====> KL: " << i << " ==> " << curKL << std::endl;
            if (curKL < minKLDivergence) {
                minKLDivergence = curKL;
                threshold       = i;
            }
        }
    } else if (mThresholdMethod == THRESHOLD_MAX) {
        threshold = mBinNumber - 1;
    } else {
        // TODO, support other method
        MNN_ASSERT(false);
    }
    return threshold;
}

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python 3計算KL散度（KL Divergence） 8bit、12bit、16bit圖像數據 8bit、12bit、16bit圖像數據熵(Entropy),交叉熵(Cross-Entropy),KL-松散度(KL Divergence) TensorFlow 8 bit模型量化 numpy學習筆記 - numpy常用函數、向量化操作及基本數學統計方法 JSON中使用jsonmapper解析的代碼和步驟學習筆記《重構：改善既有代碼的設計》-學習筆記一（+實戰解析） Pytest學習筆記（三）在代碼中運行pytest 量化投資學習筆記03——封裝回測操作