模型量化的本質就是將模型中的參數按照一定的規則 把32位或者64位浮點數 轉化位16位浮點數或者8位定點數。這里我用keras和numpy實現了16位和8位的量化,未考慮量化的科學合理性,僅僅是搞清楚量化本質的一次實驗。
量化 """ #coding:utf-8 __project_ = 'TF2learning' __file_name__ = 'quantization' __author__ = 'qilibin' __time__ = '2021/3/17 9:18' __product_name = PyCharm """ import h5py import pandas as pd import numpy as np ''' 讀取原來的只包含權重的H5模型,按層遍歷,對每層的每個權重進行16位或8位量化,將量化后的權重數值重新保存在H5文件中 ''' def quantization16bit(old_model_path,new_model_path,bit_num): ''' :param old_model_path: 未量化的模型路徑 模型是只保存了權重未保存網絡結構 :param new_model_path: 量化過后的模型路徑 :param bit_num: 量化位數 :return: ''' f = h5py.File(old_model_path,'r') f2 = h5py.File(new_model_path,'w') for layer in f.keys(): # layer : 層的名稱 print (layer) # # 每層里面的權重名稱 有的層沒有參數 # name_of_weight_of_layer = f[layer].attrs['weight_names'] # # 有的層是沒有參數的 比如 relu # length = len(name_of_weight_of_layer) length = len(list(f[layer].keys())) if length > 0: g1 = f2.create_group(layer) g1.attrs["weight_names"] = layer g2 = g1.create_group(layer) for weight in f[layer][layer].keys(): print ("wieght name is :" + weight) oldparam = f[layer][layer][weight][:] print ('-----------------------------------------old-----------------------') print (oldparam) if type(oldparam) == np.ndarray: if bit_num == 16: newparam = np.float16(oldparam) if bit_num == 8: min_val = np.min(oldparam) max_val = np.max(oldparam) oldparam = np.round((oldparam - min_val) / (max_val - min_val) * 255) newparam = np.uint8(oldparam) else: newparam = oldparam print ('-----------------------------------------new-----------------------') #print (newparam) #f[key][key][weight_name][:] = newparam 在原來模型的基礎上修改 行不通 if bit_num == 16: d = g2.create_dataset(weight, data=newparam,dtype=np.float16) if bit_num == 8: d = g2.create_dataset(weight, data=newparam, dtype=np.uint8) else: g1 = f2.create_group(layer) g1.attrs["weight_names"] = layer f.close() f2.close() old_model_path = './model_0_.h5' new_model_path = './new_model.h5' quantization16bit(old_model_path,new_model_path,8) # print (f['batch_normalization']['batch_normalization']['gamma:0'][:])
檢查量化后的文件
""" #coding:utf-8 __project_ = 'TF2learning' __file_name__ = 'readNewMoDel' __author__ = 'qilibin' __time__ = '2021/3/17 13:27' __product_name = PyCharm """ ''' 用來打印量化之后的模型 查看其各個權重的參數 ''' import h5py modelpath = './new_model.h5' #modelpath = './model_0_.h5' f = h5py.File(modelpath,'r') for layer in f.keys(): # key : 層的名稱 print ("layer name is :"+layer) # 有些層是沒有參數的 比如relu length = len(list(f[layer].keys())) #print (length) if length > 0: for weight in f[layer][layer].keys(): print("wieght name is :" + weight) param = f[layer][layer][weight][:] print(param) f.close() # print (f['batch_normalization']['batch_normalization']['gamma:0'][:])
