說明:
本例程使用YOLOv3進行昆蟲檢測。例程分為數據處理、模型設計、損失函數、訓練模型、模型預測和測試模型六個部分。本篇為第二部分,使用Paddle動態圖實現了YOLOv3,使用Darknet53骨干網絡和YOLOv3的檢測頭部。
實驗代碼:
Darknet53骨干網絡和YOLOv3頭部:
import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from source.model import DarkNet53, YOLOHeader with fluid.dygraph.guard(): # 輸入數據 x = np.random.randn(1, 3, 608, 608).astype(np.float32) x = to_variable(x) # 數據處理 backbone = DarkNet53() # 骨干網絡 detect_0 = YOLOHeader(num_channels=1024, num_filters=512) # 檢測頭部 detect_1 = YOLOHeader(num_channels=512, num_filters=256) # 檢測頭部 detect_2 = YOLOHeader(num_channels=256, num_filters=128) # 檢測頭部 c0, c1, c2 = backbone(x) c0_r, c0_t = detect_0(c0) c1_r, c1_t = detect_1(c1) c2_r, c2_t = detect_2(c2) # 輸出數據 print('c0:', c0.shape, 'route:', c0_r.shape, 'tip:', c0_t.shape) print('c1:', c1.shape, ' route:', c1_r.shape, 'tip:', c1_t.shape) print('c2:', c2.shape, ' route:', c2_r.shape, 'tip:', c2_t.shape)
結果:
c0: [1, 1024, 19, 19] route: [1, 512, 19, 19] tip: [1, 1024, 19, 19]
c1: [1, 512, 38, 38] route: [1, 256, 38, 38] tip: [1, 512, 38, 38]
c2: [1, 256, 76, 76] route: [1, 128, 76, 76] tip: [1, 256, 76, 76]
完整的YOLOV3模型:
import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from source.model import YOLOv3 with fluid.dygraph.guard(): # 輸入數據 x = np.random.randn(1, 3, 608, 608).astype(np.float32) x = to_variable(x) # 數據處理 num_classes = 7 # 類別數量 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 錨框掩碼 model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask) p0, p1, p2 = model(x) # 輸出數據 print('p0:', p0.shape) print('p1:', p1.shape) print('p2:', p2.shape)
結果:
p0: [1, 36, 19, 19]
p1: [1, 36, 38, 38]
p2: [1, 36, 76, 76]
每個YOLOv3頭部的輸出特征:
import numpy as np import paddle.fluid as fluid from paddle.fluid.dygraph.base import to_variable from source.model import YOLOv3 with fluid.dygraph.guard(): # 輸入數據 x = np.random.randn(1, 3, 608, 608).astype(np.float32) x = to_variable(x) # 數據處理 num_classes = 7 # 類別數量 anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] # 錨框掩碼 model = YOLOv3(num_classes=num_classes, anchor_mask=anchor_mask) p0, p1, p2 = model(x) p0 = fluid.layers.reshape(p0, [-1, len(anchor_mask[0]), 5 + num_classes, p0.shape[2], p0.shape[3]]) pdloc = p0[:, :, 0:4, :, :] # 位置概率 pdobj = fluid.layers.sigmoid(p0[:, :, 4, :, :]) # 物體概率 pdcls = fluid.layers.sigmoid(p0[:, :, 5:5+num_classes, :, :]) # 類別概率 print('predict_loc', pdloc.shape) print('predict_obj', pdobj.shape) print('predict_cls', pdcls.shape)
結果:
predict_loc [1, 3, 4, 19, 19]
predict_obj [1, 3, 19, 19]
predict_cls [1, 3, 7, 19, 19]
model.py文件
import paddle.fluid as fluid from paddle.fluid.dygraph.nn import Conv2D, BatchNorm from paddle.fluid.param_attr import ParamAttr from paddle.fluid.regularizer import L2Decay # 卷積正則化層 class ConvBN(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, filter_size, stride, padding): super(ConvBN, self).__init__() self.conv = Conv2D( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=padding, param_attr=ParamAttr(initializer=fluid.initializer.Normal(0, 0.02)), # 正態分布權重 bias_attr=False, # 沒有偏置參數 act=None) self.batch_norm = BatchNorm( num_channels=num_filters, param_attr=ParamAttr( initializer=fluid.initializer.Normal(0, 0.02), regularizer=L2Decay(0)), # 正態分布權重,屏蔽正則項 bias_attr=ParamAttr( initializer=fluid.initializer.Constant(0), regularizer=L2Decay(0)), # 常量偏置參數,屏蔽正則項 act=None) def forward(self, x): x = self.conv(x) x = self.batch_norm(x) x = fluid.layers.leaky_relu(x=x, alpha=0.1) # leaky_relu激活函數,輸出out=max(x,0.1∗x) return x # 下采樣層模塊 class DownSample(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, filter_size=3, stride=2, padding=1): super(DownSample, self).__init__() self.conv_bn = ConvBN( num_channels=num_channels, num_filters=num_filters, filter_size=filter_size, stride=stride, padding=padding) def forward(self, x): x = self.conv_bn(x) return x # 上采樣層模塊 class UpSample(fluid.dygraph.Layer): def __init__(self, scale=2): super(UpSample, self).__init__() self.scale = scale def forward(self, x): # 設置輸出寬高 shape = fluid.layers.shape(input=x) # 獲取輸入形狀 sizes = fluid.layers.slice(input=shape, axes=[0], starts=[2], ends=[4]) # 獲取輸入寬高 sizes = fluid.layers.cast(x=sizes, dtype='int32') # 轉換數據類型 sizes.stop_gradient = True # 停止梯度計算 # 調整輸入寬高 sizes = sizes * self.scale # 設置輸出寬高 x = fluid.layers.resize_nearest(input=x, out_shape=sizes, scale=self.scale) return x # 基礎殘差模塊 class BasicBlock(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters): super(BasicBlock, self).__init__() self.conv_bn_1 = ConvBN( num_channels=num_channels, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.conv_bn_2 = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) def forward(self, x): t = self.conv_bn_1(x) y = self.conv_bn_2(t) z = fluid.layers.elementwise_add(x=x, y=y, act=None) return z # 基礎殘差塊組 class BlockGroup(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters, num_blocks): super(BlockGroup, self).__init__() # 第一個殘差模塊 self.basicblock_0 = BasicBlock(num_channels=num_channels, num_filters=num_filters) # 剩余的殘差模塊 self.block_list = [] # 基礎模塊列表 for i in range(1, num_blocks): block_item = self.add_sublayer( 'block_' + str(i), BasicBlock(num_channels=num_channels, num_filters=num_filters)) self.block_list.append(block_item) def forward(self, x): # 第一個殘差模塊 x = self.basicblock_0(x) # 剩余的殘差模塊 for block_item in self.block_list: x = block_item(x) return x # 骨干網絡模塊 class DarkNet53(fluid.dygraph.Layer): def __init__(self): super(DarkNet53, self).__init__() # 輸入卷積采樣 self.conv_bn_1 = ConvBN(num_channels=3, num_filters=32, filter_size=3, stride=1, padding=1) self.down_sample_1 = DownSample(num_channels=32, num_filters=64) # 基礎殘差塊組 self.num_groups = [1, 2, 8, 8, 4] # 每組的模塊數 self.group_list = [] # 殘差塊組列表 for i, num_blocks in enumerate(self.num_groups): group_item = self.add_sublayer( # 每組殘差模塊第一個模塊輸入維度,第一個模塊輸出維度,和每組殘差模塊個數 'group_' + str(i), BlockGroup(num_channels=32*(2**(i+1)), num_filters=32*(2**i), num_blocks=num_blocks)) self.group_list.append(group_item) # 向下采樣塊組 self.downs_list = [] # 向下采樣列表 for i in range(len(self.num_groups) - 1): downs_item = self.add_sublayer( 'downs_' + str(i), DownSample(num_channels=32*(2**(i+1)), num_filters=32*(2**(i+2)))) self.downs_list.append(downs_item) def forward(self, x): # 提取特征圖像 x = self.conv_bn_1(x) # 提取特征圖像 x = self.down_sample_1(x) # 縮小特征圖像 # 輸出特征圖像 c_list = [] # 輸出特征列表 for i, group_item in enumerate(self.group_list): # 提取特征圖像 x = group_item(x) # 提取特征圖像 c_list.append(x) # 添加輸出列表 # 縮小特征圖像 if i < len(self.num_groups) - 1: x = self.downs_list[i](x) return c_list[-1:-4:-1] # 輸出c0, c1, c2 # 檢測頭部模塊 class YOLOHeader(fluid.dygraph.Layer): def __init__(self, num_channels, num_filters): super(YOLOHeader, self).__init__() assert num_filters % 2 == 0, "num_filters {} cannot be devided by 2".format(num_filters) self.conv_bn_1 = ConvBN( num_channels=num_channels, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.conv_bn_2 = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) self.conv_bn_3 = ConvBN( num_channels=num_filters * 2, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.conv_bn_4 = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) self.route = ConvBN( num_channels=num_filters * 2, num_filters=num_filters, filter_size=1, stride=1, padding=0) self.tip = ConvBN( num_channels=num_filters, num_filters=num_filters * 2, filter_size=3, stride=1, padding=1) def forward(self, x): # 提取特征圖像 x = self.conv_bn_1(x) x = self.conv_bn_2(x) x = self.conv_bn_3(x) x = self.conv_bn_4(x) # 輸出特征圖像 route = self.route(x) tip = self.tip(route) return route, tip # 輸出route, tip # 目標檢測模塊 class YOLOv3(fluid.dygraph.Layer): def __init__(self, num_classes, anchor_mask): super(YOLOv3, self).__init__() # 初始骨干網絡 self.backbone = DarkNet53() # 骨干網絡 # 初始檢測模塊 self.num_classes = num_classes # 類別數量 self.anchor_mask = anchor_mask # 錨框掩碼 self.dete_list = [] # 檢測頭部列表 self.conv_list = [] # 輸出卷積列表 self.rout_list = [] # 連接路徑列表 for i in range(len(self.anchor_mask)): # 添加檢測頭部 dete_item = self.add_sublayer( 'dete_' + str(i), YOLOHeader( num_channels=1024//(2**i) if i==0 else 1024//(2**i) + 512//(2**i), num_filters=512//(2**i))) self.dete_list.append(dete_item) # 添加輸出卷積 conv_item = self.add_sublayer( 'conv_' + str(i), Conv2D( num_channels=1024//(2**i), num_filters=len(self.anchor_mask[i]) * (self.num_classes + 5), filter_size=1, stride=1, padding=0, param_attr=ParamAttr( # 正態分布權重 initializer=fluid.initializer.Normal(0, 0.02)), bias_attr=ParamAttr( # 常量偏置參數,屏蔽正則項 initializer=fluid.initializer.Constant(0), regularizer=L2Decay(0)), act=None)) self.conv_list.append(conv_item) # 添加連接路徑 if i < len(self.anchor_mask) - 1: rout_item = self.add_sublayer( 'rout_' + str(i), ConvBN( num_channels=512//(2**i), num_filters=256//(2**i), filter_size=1, stride=1, padding=0)) self.rout_list.append(rout_item) # 初始上采樣層 self.upsample = UpSample() def forward(self, x): # 提取特征圖像 c_list = self.backbone(x) # 骨干網絡輸出 # 輸出檢測結果 p_list = [] # 檢測模塊輸出 for i, c_item in enumerate(c_list): # 連接上采樣層 if i > 0: # 如果不是c0輸出,則用輸出連接c0的route c_item = fluid.layers.concat(input=[route, c_item], axis=1) # 輸出檢測結果 route, tip = self.dete_list[i](c_item) # 檢測頭部輸出 p_item = self.conv_list[i](tip) # 輸出卷積結果 p_list.append(p_item) # 添加輸出列表 # 輸出上采樣層 if i < len(self.anchor_mask) - 1: route = self.rout_list[i](route) # 提取采樣特征 route = self.upsample(route) # 放大采樣特征 return p_list
參考資料:
https://blog.csdn.net/litt1e/article/details/88814417
https://blog.csdn.net/litt1e/article/details/88852745
https://blog.csdn.net/litt1e/article/details/88907542
https://aistudio.baidu.com/aistudio/projectdetail/742781
https://aistudio.baidu.com/aistudio/projectdetail/672017
https://aistudio.baidu.com/aistudio/projectdetail/868589
https://aistudio.baidu.com/aistudio/projectdetail/122277