yolov5 4.0 關於模型壓縮

本文轉載自查看原文 2021-03-17 16:26 477 Python

　　yolov5的4.0版本也是更新了有一段時間了，具體更新內容如下;

　　nn.SiLU() activations replace nn.LeakyReLU(0.1) and nn.Hardswish() activations throughout the model, simplifying the architecture as we now only have one single activation function used everywhere rather than the two types before.

　　In general the changes result in smaller models (89.0M params -> 87.7M YOLOv5x), faster inference times (6.9ms -> 6.0ms), and improved mAP (49.2 -> 50.1) for all models except YOLOv5s, which reduced mAP slightly (37.0 -> 36.8). In general the largest models benefit the most from this update. YOLOv5x in particular is now above 50.0 mAP at --img-size 640, which may be the first time this is possible at 640 resolution for any architecture I’m aware of (correct me if I’m wrong though).

　　就是使用SiLU替換了LeakyReLU和Hardswish，簡化了網絡結構，但是模型也變得更大了，在使用26771個樣本,15類別進行訓練了109個epoch后得到了一個667.2M的模型，顯然，這個模型還是比較大的。

　　所以就需要根據網絡的激活函數進行模型的量化壓縮，導出fp16模型，具體腳本如下：

　　import os

　　import torch

　　import torch.nn as nn

　　from tqdm import tqdm

　　def autopad(k, p=None):

　　# Pad to 'same'

　　if p is None:

　　p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad

　　return p

　　class Conv(nn.Module):

　　def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):

　　super(Conv, self).__init__()

　　self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p),

　　groups=g, bias=False)

　　self.bn = nn.BatchNorm2d(c2)

　　self.act = nn.SiLU() if act else nn.Identity()

　　def forward(self, x):

　　return self.act(self.bn(self.conv(x)))

　　def fuseforward(self, x):

　　return self.act(self.conv(x))

　　class Ensemble(nn.ModuleList):

　　def __init__(self):

　　super(Ensemble, self).__init__()

　　def forward(self, x, augment=False):

　　y = []

　　for module in self:

　　y.append(module(x, augment)[0])

　　y = torch.cat(y, 1)

　　return y, None

　　def attempt_load(weights, map_location=None):

　　model = Ensemble()

　　for w in weights if isinstance(weights, list) else [weights]:

　　# attempt_download(w)

　　model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model

　　for m in model.modules():

　　if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:

　　m.inplace = True # pytorch 1.7.0 compatibility

　　elif type(m) is Conv:

　　m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility

　　if len(model) == 1:

　　return model[-1] # return model

　　else:

　　print('Ensemble created with %s\n' % weights)

　　for k in ['names', 'stride']:

　　setattr(model, k, getattr(model[-1], k))

　　return model # return ensemble

　　def select_device(device='', batch_size=None):

　　# device = 'cpu' or '0' or '0,1,2,3'

　　cpu_request = device.lower() == 'cpu'

　　if device and not cpu_request: # if device requested other than 'cpu'

　　os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable

　　assert torch.cuda.is_available(

　　), 'CUDA unavailable, invalid device %s requested' % device # check availablity

　　cuda = False if cpu_request else torch.cuda.is_available()

　　if cuda:

　　c = 1024 ** 2 # bytes to MB

　　ng = torch.cuda.device_count()

　　if ng > 1 and batch_size: # check that batch_size is compatible with device_count

　　assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (

　　batch_size, ng)

　　x = [torch.cuda.get_device_properties(i) for i in range(ng)]

　　s = f'Using torch {torch.__version__} '

　　for i in range(0, ng):

　　if i == 1:

　　s = ' ' * len(s)

　　return torch.device('cuda:0' if cuda else 'cpu')

　　if __name__ == '__main__':

　　import argparse

　　parser = argparse.ArgumentParser()

　　parser.add_argument('--in_weights', type=str,

　　default='./last.pt', help='initial weights path')

　　parser.add_argument('--out_weights', type=str,

　　default='quantification.pt', help='output weights path')

　　parser.add_argument('--device', type=str, default='0', help='device')

　　opt = parser.parse_args()

　　device = select_device(opt.device)

　　model = attempt_load(opt.in_weights, map_location=device)

　　model.to(device).eval()

　　model.half()

　　torch.save(model, opt.out_weights)

　　print('done.')

　　print('-[INFO] before: {} kb, after: {} kb'.format(

　　os.path.getsize(opt.in_weights), os.path.getsize(opt.out_weights)))

　　使用4.0版本，並且self.act = nn.SiLU()，進行量化壓縮后，導出的模型由667.2M變為166M，加載官方給的detetct.py執行后，推理時間為0.460s

　　使用原版模型進行推理的時間為0.553s

　　使用4.0版本，並且self.act = nn.Hardswish()，進行量化壓縮后，導出的模型由667.2M變為166M，加載官方給的detetct.py執行后，推理時間為0.415s

　　大連婦科醫院 http://www.bhbyby.com/

　　注意事項：

　　修改工程文件夾內的models里面的experimental.py文件，否則會報錯：

　　def attempt_load(weights, map_location=None):

　　# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a

　　model = Ensemble()

　　for w in weights if isinstance(weights, list) else [weights]:

　　attempt_download(w)

　　model.append(torch.load(w, map_location=map_location).float().fuse().eval())

　　yolov5的4.0版本也是更新了有一段時間了，具體更新內容如下;

　　所以就需要根據網絡的激活函數進行模型的量化壓縮，導出fp16模型，具體腳本如下：

　　import os

　　import torch

　　import torch.nn as nn

　　from tqdm import tqdm

　　def autopad(k, p=None):

　　# Pad to 'same'

　　if p is None:

　　p = k // 2 if isinstance(k, int) else [x // 2 for x in k] # auto-pad

　　return p

　　class Conv(nn.Module):

　　def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):

　　super(Conv, self).__init__()

　　self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p),

　　groups=g, bias=False)

　　self.bn = nn.BatchNorm2d(c2)

　　self.act = nn.SiLU() if act else nn.Identity()

　　def forward(self, x):

　　return self.act(self.bn(self.conv(x)))

　　def fuseforward(self, x):

　　return self.act(self.conv(x))

　　class Ensemble(nn.ModuleList):

　　def __init__(self):

　　super(Ensemble, self).__init__()

　　def forward(self, x, augment=False):

　　y = []

　　for module in self:

　　y.append(module(x, augment)[0])

　　y = torch.cat(y, 1)

　　return y, None

　　def attempt_load(weights, map_location=None):

　　model = Ensemble()

　　for w in weights if isinstance(weights, list) else [weights]:

　　# attempt_download(w)

　　model.append(torch.load(w, map_location=map_location)['model'].float().fuse().eval()) # load FP32 model

　　for m in model.modules():

　　if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:

　　m.inplace = True # pytorch 1.7.0 compatibility

　　elif type(m) is Conv:

　　m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility

　　if len(model) == 1:

　　return model[-1] # return model

　　else:

　　print('Ensemble created with %s\n' % weights)

　　for k in ['names', 'stride']:

　　setattr(model, k, getattr(model[-1], k))

　　return model # return ensemble

　　def select_device(device='', batch_size=None):

　　# device = 'cpu' or '0' or '0,1,2,3'

　　cpu_request = device.lower() == 'cpu'

　　if device and not cpu_request: # if device requested other than 'cpu'

　　os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable

　　assert torch.cuda.is_available(

　　), 'CUDA unavailable, invalid device %s requested' % device # check availablity

　　cuda = False if cpu_request else torch.cuda.is_available()

　　if cuda:

　　c = 1024 ** 2 # bytes to MB

　　ng = torch.cuda.device_count()

　　if ng > 1 and batch_size: # check that batch_size is compatible with device_count

　　assert batch_size % ng == 0, 'batch-size %g not multiple of GPU count %g' % (

　　batch_size, ng)

　　x = [torch.cuda.get_device_properties(i) for i in range(ng)]

　　s = f'Using torch {torch.__version__} '

　　for i in range(0, ng):

　　if i == 1:

　　s = ' ' * len(s)

　　return torch.device('cuda:0' if cuda else 'cpu')

　　if __name__ == '__main__':

　　import argparse

　　parser = argparse.ArgumentParser()

　　parser.add_argument('--in_weights', type=str,

　　default='./last.pt', help='initial weights path')

　　parser.add_argument('--out_weights', type=str,

　　default='quantification.pt', help='output weights path')

　　parser.add_argument('--device', type=str, default='0', help='device')

　　opt = parser.parse_args()

　　device = select_device(opt.device)

　　model = attempt_load(opt.in_weights, map_location=device)

　　model.to(device).eval()

　　model.half()

　　torch.save(model, opt.out_weights)

　　print('done.')

　　print('-[INFO] before: {} kb, after: {} kb'.format(

　　os.path.getsize(opt.in_weights), os.path.getsize(opt.out_weights)))

　　使用4.0版本，並且self.act = nn.SiLU()，進行量化壓縮后，導出的模型由667.2M變為166M，加載官方給的detetct.py執行后，推理時間為0.460s

　　使用原版模型進行推理的時間為0.553s

　　使用4.0版本，並且self.act = nn.Hardswish()，進行量化壓縮后，導出的模型由667.2M變為166M，加載官方給的detetct.py執行后，推理時間為0.415s

　　注意事項：

　　修改工程文件夾內的models里面的experimental.py文件，否則會報錯：

　　def attempt_load(weights, map_location=None):

　　# Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a

　　model = Ensemble()

　　for w in weights if isinstance(weights, list) else [weights]:

　　attempt_download(w)

　　model.append(torch.load(w, map_location=map_location).float().fuse().eval())

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 讀取yolov5模型的權重 Yolov5模型部署與訓練 YoloV5實戰 yolov5安裝 yolov5 安裝嘗試 YOLOv5的改進【深度學習】c++部署onnx模型（Yolov5、PP-HumanSeg、GoogLeNet、UNet） ubuntu torch GPU yolov5 yolov5 運行日志 YOLOv5訓練過程