源碼地址:https://github.com/endernewton/tf-faster-rcnn
看到一個博客寫了對源碼的解析,寫的很簡潔全面,估計再寫也不可能比他寫的好了,不過還是簡單寫下源碼的解析及閱讀后的感受吧。https://blog.csdn.net/u012457308/article/details/79566195
代碼主要部分為輸入處理,網絡搭建及loss處理。最難的地方是各種reshape,如果不注意很容易就亂了,這個一定要理清
上一篇筆記簡單介紹了Faster-RCNN,這篇主要介紹下其tensorflow源碼閱讀筆記。下載后工程如下,主要程序都存儲在lib這個文件夾里面。接下來諸葛介紹該文件夾的內容。

1.數據讀取
Datasets文件夾里主要是數據讀取程序。這部分不做介紹,下一篇文章會介紹系列代碼如何制作數據集等工作。
2.Nets文件夾包含特征提取網絡(mobilenet,resnet_v1,vgg_16),即RPN網絡之前部分。第二部分為RPN網絡。此部分作用是對特征提取網絡進一步處理,產生porposes.第三部分為分類與預測框回歸網絡。RPN網絡結構在network.py中。網絡結構如下:

網絡搭建部分主程序:
1 def _build_network(self, is_training=True): 2 # select initializers進行初始化 3 if cfg.TRAIN.TRUNCATED: 4 initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) 5 initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) 6 else: 7 initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) 8 initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) 9 10 net_conv = self._image_to_head(is_training)##經過特征提取網絡,初步提取特征 11 with tf.variable_scope(self._scope, self._scope): 12 # build the anchors for the image 13 self._anchor_component()###產生anchor 14 # region proposal network ###產生proposal的坐標 15 rois = self._region_proposal(net_conv, is_training, initializer) 16 # region of interest pooling 17 if cfg.POOLING_MODE == 'crop': 18 pool5 = self._crop_pool_layer(net_conv, rois, "pool5") ###對產生的porposal進行ROI池化,統一格式 19 else: 20 raise NotImplementedError 21 22 fc7 = self._head_to_tail(pool5, is_training) 23 with tf.variable_scope(self._scope, self._scope): 24 # region classification 輸入到Fast-RCNN網絡中,對樣本進行分類和預測框回歸 25 cls_prob, bbox_pred = self._region_classification(fc7, is_training, 26 initializer, initializer_bbox) 27 28 self._score_summaries.update(self._predictions) 29 30 return rois, cls_prob, bbox_pred
逐步進行代碼分析。
1. 特征提取網絡,有三個備選項,論文好像選的是VGG-16,這部分就是輸入網絡,獲得某卷積層的輸出,沒有特別的地方。
2.RPN網絡,此部分是重點,也是Faster-RCNN與前面幾種方法最大的不同
anchor產生方式:每個特征點周圍產生9個anchor,分別為3種面積,3種比例。程序為generate_anchors.py。程序很好理解,就是(0,0,15,15)為基准點,有三種比例(0.5,1,2)求得基准anchor的坐標后,分別乘(8,16,32)倍的scal,就得到9種anchor的坐標,之后再平移,就得到所有特征點周圍anchor的坐標。
產生proposal。
接下來是RPN網絡部分主函數,產生目標的porposal:
1 def _region_proposal(self, net_conv, is_training, initializer): 2 rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer, 3 scope="rpn_conv/3x3") ##經過一個3X3卷積,之后分兩條線 4 self._act_summaries.append(rpn) 5 rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, 6 weights_initializer=initializer, 7 padding='VALID', activation_fn=None, scope='rpn_cls_score') ###第一條線產生預測類別確定是背景還是類別 8 # change it so that the score has 2 as its channel size 9 rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') 10 rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") 11 rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred") 12 rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") 13 rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, ###第二條線產生預測框坐標,對預測框坐標進行預測 14 weights_initializer=initializer, 15 padding='VALID', activation_fn=None, scope='rpn_bbox_pred') 16 if is_training: 17 rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") ###根據預測的類別和預測框坐標對porposa進行篩選,對前N個進行NMS 18 rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") 19 # Try to have a deterministic order for the computing graph, for reproducibility 20 with tf.control_dependencies([rpn_labels]): 21 rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") 22 else: 23 if cfg.TEST.MODE == 'nms': 24 rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") 25 elif cfg.TEST.MODE == 'top': 26 rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") 27 else: 28 raise NotImplementedError 29 30 self._predictions["rpn_cls_score"] = rpn_cls_score 31 self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape 32 self._predictions["rpn_cls_prob"] = rpn_cls_prob 33 self._predictions["rpn_cls_pred"] = rpn_cls_pred 34 self._predictions["rpn_bbox_pred"] = rpn_bbox_pred 35 self._predictions["rois"] = rois 36 37 return rois
產生目標porposals的坐標后,因為size大小不一樣,需要進行ROI池化,使輸出為統一維度。程序如下:
1 def _crop_pool_layer(self, bottom, rois, name): ####bottom為convert層卷積輸出, feat_stride為補償乘積,用來求得原圖的w,h.rois為選出的256個anchor的坐標 2 with tf.variable_scope(name) as scope: 3 batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1]) 4 # Get the normalized coordinates of bounding boxes 5 bottom_shape = tf.shape(bottom) 6 height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0]) 7 width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0]) 8 x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width 9 y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height 10 x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width 11 y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height###得到相對位置 12 # Won't be back-propagated to rois anyway, but to save time 13 bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1)) 14 pre_pool_size = cfg.POOLING_SIZE * 2 15 crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")##利用tensorflow的自帶函數作用類似於ROI池化 16 17 return slim.max_pool2d(crops, [2, 2], padding='SAME')
之后就是Fast-RCNN部分的網絡結構了,即根據產生的porposal,進行分類及預測。網絡部分代碼:
1 def _region_classification(self, fc7, is_training, initializer, initializer_bbox): 2 cls_score = slim.fully_connected(fc7, self._num_classes, 3 weights_initializer=initializer, 4 trainable=is_training, 5 activation_fn=None, scope='cls_score') 6 cls_prob = self._softmax_layer(cls_score, "cls_prob") 7 cls_pred = tf.argmax(cls_score, axis=1, name="cls_pred") 8 bbox_pred = slim.fully_connected(fc7, self._num_classes * 4, 9 weights_initializer=initializer_bbox, 10 trainable=is_training, 11 activation_fn=None, scope='bbox_pred') 12 13 self._predictions["cls_score"] = cls_score 14 self._predictions["cls_pred"] = cls_pred 15 self._predictions["cls_prob"] = cls_prob 16 self._predictions["bbox_pred"] = bbox_pred 17 18 return cls_prob, bbox_pred
最后說一下loss函數:分為RPN部分loss和RCNN部分loss。整體網絡訓練是聯合訓練的。
1 def _add_losses(self, sigma_rpn=3.0): 2 with tf.variable_scope('LOSS_' + self._tag) as scope: 3 # RPN, class loss 4 rpn_cls_score = tf.reshape(self._predictions['rpn_cls_score_reshape'], [-1, 2]) 5 rpn_label = tf.reshape(self._anchor_targets['rpn_labels'], [-1]) 6 rpn_select = tf.where(tf.not_equal(rpn_label, -1)) 7 rpn_cls_score = tf.reshape(tf.gather(rpn_cls_score, rpn_select), [-1, 2]) 8 rpn_label = tf.reshape(tf.gather(rpn_label, rpn_select), [-1]) 9 rpn_cross_entropy = tf.reduce_mean( 10 tf.nn.sparse_softmax_cross_entropy_with_logits(logits=rpn_cls_score, labels=rpn_label)) 11 12 # RPN, bbox loss 13 rpn_bbox_pred = self._predictions['rpn_bbox_pred'] 14 rpn_bbox_targets = self._anchor_targets['rpn_bbox_targets'] 15 rpn_bbox_inside_weights = self._anchor_targets['rpn_bbox_inside_weights'] 16 rpn_bbox_outside_weights = self._anchor_targets['rpn_bbox_outside_weights'] 17 rpn_loss_box = self._smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights, 18 rpn_bbox_outside_weights, sigma=sigma_rpn, dim=[1, 2, 3]) 19 20 # RCNN, class loss 21 cls_score = self._predictions["cls_score"] 22 label = tf.reshape(self._proposal_targets["labels"], [-1]) 23 cross_entropy = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=cls_score, labels=label)) 24 25 # RCNN, bbox loss 26 bbox_pred = self._predictions['bbox_pred'] 27 bbox_targets = self._proposal_targets['bbox_targets'] 28 bbox_inside_weights = self._proposal_targets['bbox_inside_weights'] 29 bbox_outside_weights = self._proposal_targets['bbox_outside_weights'] 30 loss_box = self._smooth_l1_loss(bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights) 31 32 self._losses['cross_entropy'] = cross_entropy 33 self._losses['loss_box'] = loss_box 34 self._losses['rpn_cross_entropy'] = rpn_cross_entropy 35 self._losses['rpn_loss_box'] = rpn_loss_box 36 37 loss = cross_entropy + loss_box + rpn_cross_entropy + rpn_loss_box 38 regularization_loss = tf.add_n(tf.losses.get_regularization_losses(), 'regu') 39 self._losses['total_loss'] = loss + regularization_loss 40 41 self._event_summaries.update(self._losses) 42 43 return loss
