學習Faster R-CNN代碼rpn（六）

本文轉載自查看原文 2019-08-15 19:12 908 Faster-RCNN

代碼文件結構

bbox_transform.py # bounding box變換。
generate_anchors.py # 生成anchor，根據幾種尺度和比例生成的anchor。
proposal_layer.py # 通過將估計的邊界框變換應用於一組常規框（稱為“錨點”）來輸出對象檢測候選區域。選出合適的ROIS。
anchor_target_layer.py # 將anchor對應ground truth。生成anchor分類標簽和邊界框回歸目標。為anchor找到訓練所需的ground truth類別和坐標變換信息。
proposal_target_layer_cascade.py # 將對象檢測候選分配給ground truth目標。生成候選分類標簽和邊界框回歸目標。為選擇出的rois找到訓練所需的ground truth類別和坐標變換信息
rpn.py # RPN網絡定義。

參考

詳細的Faster R-CNN源碼解析之RPN源碼解析 https://blog.csdn.net/jiongnima/article/details/79781792

Faster R-CNN 入坑之源碼閱讀 https://www.jianshu.com/p/a223853f8402?tdsourcetag=s_pcqq_aiomsg 對RPN部分代碼進行注釋。

1 rpn.py

  1 class _RPN(nn.Module):
  2     """ region proposal network """
  3     def __init__(self, din):
  4         super(_RPN, self).__init__()
  5         
  6         #得到輸入特征圖的深度
  7         self.din = din  # get depth of input feature map, e.g., 512
  8         #anchor的尺度 __C.ANCHOR_SCALES = [8,16,32]
  9         self.anchor_scales = cfg.ANCHOR_SCALES
 10         #anchor的比例 __C.ANCHOR_RATIOS = [0.5,1,2]
 11         self.anchor_ratios = cfg.ANCHOR_RATIOS
 12         #特征步長 __C.FEAT_STRIDE = [16, ]
 13         self.feat_stride = cfg.FEAT_STRIDE[0]
 14 
 15         # define the convrelu layers processing input feature map
 16         #定義處理輸入要素圖的convrelu層
 17         #nn.Conv2d(in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True)
 18         self.RPN_Conv = nn.Conv2d(self.din, 512, 3, 1, 1, bias=True)
 19 
 20         # define bg/fg classifcation score layer
 21         #定義背景和前景分類得分
 22         #對每個anchor都要進行背景或前景的分類得分，個數就是尺度個數乘以比例個數再乘以2
 23         self.nc_score_out = len(self.anchor_scales) * len(self.anchor_ratios) * 2 # 2(bg/fg) * 9 (anchors)
 24         #上面是RPN卷積 這里是分類， 網絡輸入是512 輸出是參數個數
 25         self.RPN_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)
 26 
 27         # define anchor box offset prediction layer
 28         #定義anchor的偏移層
 29         #偏移的輸出個數是anchor個數乘以4
 30         self.nc_bbox_out = len(self.anchor_scales) * len(self.anchor_ratios) * 4 # 4(coords) * 9 (anchors)
 31         #網絡輸入是512 輸出是參數個數
 32         self.RPN_bbox_pred = nn.Conv2d(512, self.nc_bbox_out, 1, 1, 0)
 33 
 34         # define proposal layer
 35         #定義候選區域層 _ProposalLayer
 36         # 參數是 特征步長 尺度 比例
 37         self.RPN_proposal = _ProposalLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 38 
 39         # define anchor target layer
 40         #定義anchor目標層 _AnchorTargetLayer
 41         # 參數是 特征步長 尺度 比例
 42         self.RPN_anchor_target = _AnchorTargetLayer(self.feat_stride, self.anchor_scales, self.anchor_ratios)
 43 
 44         self.rpn_loss_cls = 0 #分類損失
 45         self.rpn_loss_box = 0 #回歸損失
 46 
 47     @staticmethod #靜態方法
 48     #將x reshape
 49     def reshape(x, d):
 50         input_shape = x.size()
 51         x = x.view(
 52             input_shape[0],
 53             int(d),
 54             int(float(input_shape[1] * input_shape[2]) / float(d)),
 55             input_shape[3]
 56         )
 57         return x
 58 
 59     def forward(self, base_feat, im_info, gt_boxes, num_boxes):
 60 
 61         #features信息包括batch_size，data_height，data_width，num_channels
 62         #即批尺寸，特征數據高度，特征數據寬度，特征的通道數。
 63         batch_size = base_feat.size(0)#特征的第一維
 64 
 65         # return feature map after convrelu layer
 66         # 在卷積之后返回特征圖
 67         rpn_conv1 = F.relu(self.RPN_Conv(base_feat), inplace=True)
 68         # get rpn classification score
 69         #得到RPN分類得分
 70         rpn_cls_score = self.RPN_cls_score(rpn_conv1)
 71 
 72         ##將rpn_cls_score轉化為rpn_cls_score_reshape
 73         rpn_cls_score_reshape = self.reshape(rpn_cls_score, 2)
 74         #用softmax函數得到概率
 75         rpn_cls_prob_reshape = F.softmax(rpn_cls_score_reshape, 1)
 76         #前景背景分類，2個參數
 77         rpn_cls_prob = self.reshape(rpn_cls_prob_reshape, self.nc_score_out)
 78 
 79         # get rpn offsets to the anchor boxes
 80         #4個參數的偏移
 81         rpn_bbox_pred = self.RPN_bbox_pred(rpn_conv1)
 82 
 83         # proposal layer
 84         cfg_key = 'TRAIN' if self.training else 'TEST'
 85 
 86         #用anchor提取候選區域
 87         #參數有分類概率 四個參數偏移 圖片信息
 88         rois = self.RPN_proposal((rpn_cls_prob.data, rpn_bbox_pred.data,
 89                                  im_info, cfg_key))
 90 
 91         self.rpn_loss_cls = 0#分類損失
 92         self.rpn_loss_box = 0#回歸損失
 93 
 94         # generating training labels and build the rpn loss
 95         #生成訓練標簽並構建rpn損失
 96         if self.training:#訓練
 97             assert gt_boxes is not None
 98 
 99             #anchor的目標
100             rpn_data = self.RPN_anchor_target((rpn_cls_score.data, gt_boxes, im_info, num_boxes))
101 
102             # compute classification loss
103             #計算分類損失
104             #permute(多維數組,[維數的組合]) 該函數是改變維數
105             #contiguous：view只能用在contiguous的variable上。
106             #如果在view之前用了transpose, permute等，需要用contiguous()來返回一個contiguous copy。
107             ##返回rpn網絡判斷的anchor前后景分數
108             rpn_cls_score = rpn_cls_score_reshape.permute(0, 2, 3, 1).contiguous().view(batch_size, -1, 2)
109             ##返回每個anchor屬於前景還是后景的ground truth
110             rpn_label = rpn_data[0].view(batch_size, -1)
111 
112             rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))
113             rpn_cls_score = torch.index_select(rpn_cls_score.view(-1,2), 0, rpn_keep)
114             rpn_label = torch.index_select(rpn_label.view(-1), 0, rpn_keep.data)
115             rpn_label = Variable(rpn_label.long())
116             self.rpn_loss_cls = F.cross_entropy(rpn_cls_score, rpn_label)
117             fg_cnt = torch.sum(rpn_label.data.ne(0))
118 
119             rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = rpn_data[1:]
120 
121             # compute bbox regression loss
122             #計算回歸損失
123             
124             ##在訓練計算邊框誤差時有用，僅對未超出圖像邊界的anchor有用
125             rpn_bbox_inside_weights = Variable(rpn_bbox_inside_weights)
126             ##在訓練計算邊框誤差時有用，僅對未超出圖像邊界的anchor有用
127             rpn_bbox_outside_weights = Variable(rpn_bbox_outside_weights)
128             ##返回每個anchor對應的事實的四個偏移值
129             rpn_bbox_targets = Variable(rpn_bbox_targets)
130 
131             ##計算rpn的邊界損失loss，請注意在這里用到了inside和outside_weights
132             self.rpn_loss_box = _smooth_l1_loss(rpn_bbox_pred, rpn_bbox_targets, rpn_bbox_inside_weights,
133                                                             rpn_bbox_outside_weights, sigma=3, dim=[1,2,3])
134 
135         return rois, self.rpn_loss_cls, self.rpn_loss_box

2 generate_anchors.py

這一部分比較簡單，就是把幾種尺度幾種比例（這里是3種）的anchor合起來用anchors來存儲所有的anchor。
詳細注釋如下：

  1 # Verify that we compute the same anchors as Shaoqing's matlab implementation:
  2 #
  3 #    >> load output/rpn_cachedir/faster_rcnn_VOC2007_ZF_stage1_rpn/anchors.mat
  4 #    >> anchors
  5 #
  6 #    anchors =     %9種anchor
  7 #
  8 #       -83   -39   100    56
  9 #      -175   -87   192   104
 10 #      -359  -183   376   200
 11 #       -55   -55    72    72
 12 #      -119  -119   136   136
 13 #      -247  -247   264   264
 14 #       -35   -79    52    96
 15 #       -79  -167    96   184
 16 #      -167  -343   184   360
 17 
 18 #array([[ -83.,  -39.,  100.,   56.],
 19 #       [-175.,  -87.,  192.,  104.],
 20 #       [-359., -183.,  376.,  200.],
 21 #       [ -55.,  -55.,   72.,   72.],
 22 #       [-119., -119.,  136.,  136.],
 23 #       [-247., -247.,  264.,  264.],
 24 #       [ -35.,  -79.,   52.,   96.],
 25 #       [ -79., -167.,   96.,  184.],
 26 #       [-167., -343.,  184.,  360.]])
 27 
 28 try:
 29     xrange          # Python 2
 30 except NameError:
 31     xrange = range  # Python 3
 32 
 33 
 34 def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 35                      scales=2**np.arange(3, 6)):#arange函數用於創建等差數組3 4 5 
 36     """
 37     Generate anchor (reference) windows by enumerating aspect ratios X
 38     scales wrt a reference (0, 0, 15, 15) window.
 39     """
 40 
 41     base_anchor = np.array([1, 1, base_size, base_size]) - 1
 42     ratio_anchors = _ratio_enum(base_anchor, ratios)
 43     #vstack(tup) ，參數tup可以是元組，列表，或者numpy數組，返回結果為numpy的數組，
 44     #就是橫着排起來
 45     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 46                          for i in xrange(ratio_anchors.shape[0])])
 47     return anchors
 48 
 49 #Return width, height, x center, and y center for an anchor (window).
 50 #得到anchor寬 高 中點坐標
 51 def _whctrs(anchor):
 52     """
 53     Return width, height, x center, and y center for an anchor (window).
 54     """
 55 
 56     w = anchor[2] - anchor[0] + 1
 57     h = anchor[3] - anchor[1] + 1
 58     x_ctr = anchor[0] + 0.5 * (w - 1)
 59     y_ctr = anchor[1] + 0.5 * (h - 1)
 60     return w, h, x_ctr, y_ctr
 61 
 62 #把給的anchor合在一起，按列排
 63 def _mkanchors(ws, hs, x_ctr, y_ctr):
 64     """
 65     Given a vector of widths (ws) and heights (hs) around a center
 66     (x_ctr, y_ctr), output a set of anchors (windows).
 67     """
 68 
 69     ws = ws[:, np.newaxis]#np.newaxis 在使用和功能上等價於 None，其實就是 None 的一個別名。
 70     hs = hs[:, np.newaxis]
 71     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 72                          y_ctr - 0.5 * (hs - 1),
 73                          x_ctr + 0.5 * (ws - 1),
 74                          y_ctr + 0.5 * (hs - 1)))
 75     return anchors
 76 
 77 #每個比例下有一組anchor
 78 def _ratio_enum(anchor, ratios):
 79     """
 80     Enumerate a set of anchors for each aspect ratio wrt an anchor.
 81     """
 82 
 83     w, h, x_ctr, y_ctr = _whctrs(anchor)#上面定義的函數 得到anchor的寬高中心
 84     size = w * h
 85     size_ratios = size / ratios#該比例下anchor的大小
 86     ws = np.round(np.sqrt(size_ratios))
 87     hs = np.round(ws * ratios)
 88     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)#把這個比例下的anchor保留下來
 89     return anchors
 90 
 91 #每個尺度下有一組anchor
 92 def _scale_enum(anchor, scales):
 93     """
 94     Enumerate a set of anchors for each scale wrt an anchor.
 95     """
 96 
 97     w, h, x_ctr, y_ctr = _whctrs(anchor)
 98     ws = w * scales
 99     hs = h * scales
100     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)#把這個比例下的anchor保留下來
101     return anchors
102 
103 if __name__ == '__main__':
104     import time
105     t = time.time()
106     a = generate_anchors()#得到的anchor
107     print(time.time() - t)
108     print(a)
109     from IPython import embed; embed()

3 proposal_layer.py

根據anchor得到候選區域，這里有NMS，在后面再介紹。詳細注釋如下：

  1 #通過將估計的邊界框變換應用於一組常規框（稱為“錨點”）來輸出對象檢測候選區域。
  2 class _ProposalLayer(nn.Module):
  3     """
  4     Outputs object detection proposals by applying estimated bounding-box
  5     transformations to a set of regular boxes (called "anchors").
  6     """
  7     #參數是 特征步長 尺度 比例
  8     def __init__(self, feat_stride, scales, ratios):
  9         super(_ProposalLayer, self).__init__()
 10         #得到特征步長
 11         self._feat_stride = feat_stride
 12         #得到所有的anchor
 13         self._anchors = torch.from_numpy(generate_anchors(scales=np.array(scales), 
 14             ratios=np.array(ratios))).float()
 15         #anchors的行數就是所有anchor的個數
 16         self._num_anchors = self._anchors.size(0)
 17 
 18         # rois blob: holds R regions of interest, each is a 5-tuple #一個索引和四個矩形參數
 19         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 20         # rectangle (x1, y1, x2, y2)
 21         # top[0].reshape(1, 5)
 22         #
 23         # # scores blob: holds scores for R regions of interest
 24         # if len(top) > 1:
 25         #     top[1].reshape(1, 1, 1, 1)
 26 
 27     def forward(self, input):
 28 
 29         # Algorithm:
 30         #
 31         # for each (H, W) location i
 32         #   generate A anchor boxes centered on cell i
 33         #   apply predicted bbox deltas at cell i to each of the A anchors
 34         # clip predicted boxes to image
 35         # remove predicted boxes with either height or width < threshold
 36         # sort all (proposal, score) pairs by score from highest to lowest
 37         # take top pre_nms_topN proposals before NMS
 38         # apply NMS with threshold 0.7 to remaining proposals
 39         # take after_nms_topN proposals after NMS
 40         # return the top proposals (-> RoIs top, scores top)
 41         #在NMS后得到最佳的
 42 
 43 
 44         # the first set of _num_anchors channels are bg probs
 45         #_num_anchors通道的第一組是背景概率
 46         # the second set are the fg probs
 47         #第二組是前景概率
 48         scores = input[0][:, self._num_anchors:, :, :]#分類概率
 49         bbox_deltas = input[1]#偏移
 50         im_info = input[2]#圖像信息
 51         cfg_key = input[3]#是training還是test
 52 
 53         #設置一些參數
 54         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 55         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 56         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 57         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 58 
 59         #批尺寸
 60         batch_size = bbox_deltas.size(0)
 61         #下面是在原圖上生成anchor
 62         feat_height, feat_width = scores.size(2), scores.size(3)
 63         shift_x = np.arange(0, feat_width) * self._feat_stride#shape: [width,] 特征圖相對於原圖的偏移
 64         shift_y = np.arange(0, feat_height) * self._feat_stride#shape: [height,]
 65         shift_x, shift_y = np.meshgrid(shift_x, shift_y) #生成網格 shift_x shape: [height, width], shift_y shape: [height, width]
 66         shifts = torch.from_numpy(np.vstack((shift_x.ravel(), shift_y.ravel(),
 67                                   shift_x.ravel(), shift_y.ravel())).transpose()) #shape[height*width, 4]
 68         shifts = shifts.contiguous().type_as(scores).float()
 69 
 70         A = self._num_anchors
 71         K = shifts.size(0)
 72 
 73         self._anchors = self._anchors.type_as(scores)
 74         # anchors = self._anchors.view(1, A, 4) + shifts.view(1, K, 4).permute(1, 0, 2).contiguous()
 75         anchors = self._anchors.view(1, A, 4) + shifts.view(K, 1, 4)
 76         anchors = anchors.view(1, K * A, 4).expand(batch_size, K * A, 4)
 77 
 78         # Transpose and reshape predicted bbox transformations to get them
 79         # into the same order as the anchors:
 80         #轉置和重塑預測的bbox轉換，使它們與錨點的順序相同：
 81 
 82         bbox_deltas = bbox_deltas.permute(0, 2, 3, 1).contiguous()
 83         bbox_deltas = bbox_deltas.view(batch_size, -1, 4)
 84 
 85         # Same story for the scores:
 86         scores = scores.permute(0, 2, 3, 1).contiguous()
 87         scores = scores.view(batch_size, -1)
 88 
 89         # Convert anchors into proposals via bbox transformations
 90         #通過bbox轉換將錨點轉換為候選區域
 91         proposals = bbox_transform_inv(anchors, bbox_deltas, batch_size)
 92 
 93         # 2. clip predicted boxes to image
 94         #裁剪預測框到圖像
 95         #嚴格限制proposal的四個角在圖像邊界內
 96         proposals = clip_boxes(proposals, im_info, batch_size)
 97         # proposals = clip_boxes_batch(proposals, im_info, batch_size)
 98 
 99         # assign the score to 0 if it's non keep.
100         # keep = self._filter_boxes(proposals, min_size * im_info[:, 2])
101 
102         # trim keep index to make it euqal over batch
103         # keep_idx = torch.cat(tuple(keep_idx), 0)
104 
105         # scores_keep = scores.view(-1)[keep_idx].view(batch_size, trim_size)
106         # proposals_keep = proposals.view(-1, 4)[keep_idx, :].contiguous().view(batch_size, trim_size, 4)
107         
108         # _, order = torch.sort(scores_keep, 1, True)
109         
110         scores_keep = scores
111         proposals_keep = proposals
112         _, order = torch.sort(scores_keep, 1, True)
113 
114         output = scores.new(batch_size, post_nms_topN, 5).zero_()
115         for i in range(batch_size):
116             # # 3. remove predicted boxes with either height or width < threshold
117             # # (NOTE: convert min_size to input image scale stored in im_info[2])
118             #刪除高度或寬度<閾值的預測框（注意：將min_size轉換為存儲在im_info [2]中的輸入圖像比例）
119             proposals_single = proposals_keep[i]
120             scores_single = scores_keep[i]
121 
122             # # 4. sort all (proposal, score) pairs by score from highest to lowest
123             #按分數從最高到最低排序所有（h候選區域，得分）對
124             # # 5. take top pre_nms_topN (e.g. 6000)
125             #取頂部pre_nms_topN
126             order_single = order[i]
127 
128             if pre_nms_topN > 0 and pre_nms_topN < scores_keep.numel():
129                 order_single = order_single[:pre_nms_topN]
130 
131             proposals_single = proposals_single[order_single, :]
132             scores_single = scores_single[order_single].view(-1,1)
133 
134             # 6. apply nms (e.g. threshold = 0.7)
135             # 7. take after_nms_topN (e.g. 300)
136             # 8. return the top proposals (-> RoIs top)
137 
138             keep_idx_i = nms(torch.cat((proposals_single, scores_single), 1), nms_thresh, force_cpu=not cfg.USE_GPU_NMS)
139             keep_idx_i = keep_idx_i.long().view(-1)
140 
141             if post_nms_topN > 0:
142                 keep_idx_i = keep_idx_i[:post_nms_topN]
143             proposals_single = proposals_single[keep_idx_i, :]
144             scores_single = scores_single[keep_idx_i, :]
145 
146             # padding 0 at the end.
147             num_proposal = proposals_single.size(0)
148             output[i,:,0] = i
149             output[i,:num_proposal,1:] = proposals_single
150 
151         return output
152 
153     def backward(self, top, propagate_down, bottom):
154         """This layer does not propagate gradients."""
155         pass
156 
157     def reshape(self, bottom, top):
158         """Reshaping happens during the call to forward."""
159         pass
160 
161     #刪除任何小於min_size的邊框
162     def _filter_boxes(self, boxes, min_size):
163         """Remove all boxes with any side smaller than min_size."""
164         ws = boxes[:, :, 2] - boxes[:, :, 0] + 1
165         hs = boxes[:, :, 3] - boxes[:, :, 1] + 1
166         #expand_as(ws) 將tensor擴展為參數ws的大小
167         keep = ((ws >= min_size.view(-1,1).expand_as(ws)) & (hs >= min_size.view(-1,1).expand_as(hs)))
168         return keep

4 bbox_transform.py

就是一些變換，注釋如下：

  1 #在計算anchor的坐標變換值的時候，使用到了bbox_transform函數，
  2 #注意在計算坐標變換的時候是將anchor的表示形式變成中心坐標與長寬
  3 def bbox_transform(ex_rois, gt_rois):
  4     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
  5     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
  6     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
  7     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights#計算得到每個anchor的中心坐標和長寬
  8 
  9     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 10     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 11     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
 12     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights#計算每個anchor對應的ground truth box對應的中心坐標和長寬
 13 
 14     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths#計算四個坐標變換值
 15     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 16     targets_dw = torch.log(gt_widths / ex_widths)
 17     targets_dh = torch.log(gt_heights / ex_heights)
 18 
 19     targets = torch.stack(
 20         (targets_dx, targets_dy, targets_dw, targets_dh),1)#對於每一個anchor，得到四個關系值 shape: [4, num_anchor]
 21 
 22     return targets
 23 
 24 def bbox_transform_batch(ex_rois, gt_rois):
 25 
 26     if ex_rois.dim() == 2:
 27         ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 28         ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 29         ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
 30         ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
 31 
 32         gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
 33         gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
 34         gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
 35         gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
 36 
 37         targets_dx = (gt_ctr_x - ex_ctr_x.view(1,-1).expand_as(gt_ctr_x)) / ex_widths
 38         targets_dy = (gt_ctr_y - ex_ctr_y.view(1,-1).expand_as(gt_ctr_y)) / ex_heights
 39         targets_dw = torch.log(gt_widths / ex_widths.view(1,-1).expand_as(gt_widths))
 40         targets_dh = torch.log(gt_heights / ex_heights.view(1,-1).expand_as(gt_heights))
 41 
 42     elif ex_rois.dim() == 3:
 43         ex_widths = ex_rois[:, :, 2] - ex_rois[:, :, 0] + 1.0
 44         ex_heights = ex_rois[:,:, 3] - ex_rois[:,:, 1] + 1.0
 45         ex_ctr_x = ex_rois[:, :, 0] + 0.5 * ex_widths
 46         ex_ctr_y = ex_rois[:, :, 1] + 0.5 * ex_heights
 47 
 48         gt_widths = gt_rois[:, :, 2] - gt_rois[:, :, 0] + 1.0
 49         gt_heights = gt_rois[:, :, 3] - gt_rois[:, :, 1] + 1.0
 50         gt_ctr_x = gt_rois[:, :, 0] + 0.5 * gt_widths
 51         gt_ctr_y = gt_rois[:, :, 1] + 0.5 * gt_heights
 52 
 53         targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
 54         targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 55         targets_dw = torch.log(gt_widths / ex_widths)
 56         targets_dh = torch.log(gt_heights / ex_heights)
 57     else:
 58         raise ValueError('ex_roi input dimension is not correct.')
 59 
 60     targets = torch.stack(
 61         (targets_dx, targets_dy, targets_dw, targets_dh),2)
 62 
 63     return targets
 64 
 65 #bbox_transform_inv函數結合RPN的輸出對所有初始框進行了坐標變換
 66 def bbox_transform_inv(boxes, deltas, batch_size):
 67 
 68     ##獲得初始proposal的中心和長寬信息
 69     widths = boxes[:, :, 2] - boxes[:, :, 0] + 1.0
 70     heights = boxes[:, :, 3] - boxes[:, :, 1] + 1.0
 71     ctr_x = boxes[:, :, 0] + 0.5 * widths
 72     ctr_y = boxes[:, :, 1] + 0.5 * heights
 73     
 74     ##獲得坐標變換信息
 75     dx = deltas[:, :, 0::4]
 76     dy = deltas[:, :, 1::4]
 77     dw = deltas[:, :, 2::4]
 78     dh = deltas[:, :, 3::4]
 79 
 80     # #得到改變后的proposal的中心和長寬信息
 81     pred_ctr_x = dx * widths.unsqueeze(2) + ctr_x.unsqueeze(2)
 82     pred_ctr_y = dy * heights.unsqueeze(2) + ctr_y.unsqueeze(2)
 83     pred_w = torch.exp(dw) * widths.unsqueeze(2)
 84     pred_h = torch.exp(dh) * heights.unsqueeze(2)
 85 
 86     #將改變后的proposal的中心和長寬信息還原成左上角和右下角的版本
 87     pred_boxes = deltas.clone()
 88     # x1
 89     pred_boxes[:, :, 0::4] = pred_ctr_x - 0.5 * pred_w
 90     # y1
 91     pred_boxes[:, :, 1::4] = pred_ctr_y - 0.5 * pred_h
 92     # x2
 93     pred_boxes[:, :, 2::4] = pred_ctr_x + 0.5 * pred_w
 94     # y2
 95     pred_boxes[:, :, 3::4] = pred_ctr_y + 0.5 * pred_h
 96 
 97     return pred_boxes
 98 
 99 def clip_boxes_batch(boxes, im_shape, batch_size):
100     """
101     Clip boxes to image boundaries.
102     """
103     num_rois = boxes.size(1)
104 
105     boxes[boxes < 0] = 0
106     # batch_x = (im_shape[:,0]-1).view(batch_size, 1).expand(batch_size, num_rois)
107     # batch_y = (im_shape[:,1]-1).view(batch_size, 1).expand(batch_size, num_rois)
108 
109     batch_x = im_shape[:, 1] - 1
110     batch_y = im_shape[:, 0] - 1
111 
112     boxes[:,:,0][boxes[:,:,0] > batch_x] = batch_x
113     boxes[:,:,1][boxes[:,:,1] > batch_y] = batch_y
114     boxes[:,:,2][boxes[:,:,2] > batch_x] = batch_x
115     boxes[:,:,3][boxes[:,:,3] > batch_y] = batch_y
116 
117     return boxes
118 
119 #嚴格限制proposal的四個角在圖像邊界內
120 def clip_boxes(boxes, im_shape, batch_size):
121 
122     for i in range(batch_size):
123         boxes[i,:,0::4].clamp_(0, im_shape[i, 1]-1)
124         boxes[i,:,1::4].clamp_(0, im_shape[i, 0]-1)
125         boxes[i,:,2::4].clamp_(0, im_shape[i, 1]-1)
126         boxes[i,:,3::4].clamp_(0, im_shape[i, 0]-1)
127 
128     return boxes
129 
130 
131 ##計算重合程度，兩個框之間的重合區域的面積 / 兩個區域一共加起來的面
132 def bbox_overlaps(anchors, gt_boxes):
133     """
134     anchors: (N, 4) ndarray of float
135     gt_boxes: (K, 4) ndarray of float
136 
137     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
138     """
139     N = anchors.size(0)
140     K = gt_boxes.size(0)
141 
142     gt_boxes_area = ((gt_boxes[:,2] - gt_boxes[:,0] + 1) *
143                 (gt_boxes[:,3] - gt_boxes[:,1] + 1)).view(1, K)
144 
145     anchors_area = ((anchors[:,2] - anchors[:,0] + 1) *
146                 (anchors[:,3] - anchors[:,1] + 1)).view(N, 1)
147 
148     boxes = anchors.view(N, 1, 4).expand(N, K, 4)
149     query_boxes = gt_boxes.view(1, K, 4).expand(N, K, 4)
150 
151     iw = (torch.min(boxes[:,:,2], query_boxes[:,:,2]) -
152         torch.max(boxes[:,:,0], query_boxes[:,:,0]) + 1)
153     iw[iw < 0] = 0
154 
155     ih = (torch.min(boxes[:,:,3], query_boxes[:,:,3]) -
156         torch.max(boxes[:,:,1], query_boxes[:,:,1]) + 1)
157     ih[ih < 0] = 0
158 
159     ua = anchors_area + gt_boxes_area - (iw * ih)
160     overlaps = iw * ih / ua
161 
162     return overlaps
163 
164 def bbox_overlaps_batch(anchors, gt_boxes):
165     """
166     anchors: (N, 4) ndarray of float
167     gt_boxes: (b, K, 5) ndarray of float
168 
169     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
170     """
171     batch_size = gt_boxes.size(0)
172 
173 
174     if anchors.dim() == 2:
175 
176         N = anchors.size(0)
177         K = gt_boxes.size(1)
178 
179         anchors = anchors.view(1, N, 4).expand(batch_size, N, 4).contiguous()
180         gt_boxes = gt_boxes[:,:,:4].contiguous()
181 
182 
183         gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1)
184         gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1)
185         gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
186 
187         anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1)
188         anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1)
189         anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
190 
191         gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
192         anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
193 
194         boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
195         query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
196 
197         iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) -
198             torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1)
199         iw[iw < 0] = 0
200 
201         ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) -
202             torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1)
203         ih[ih < 0] = 0
204         ua = anchors_area + gt_boxes_area - (iw * ih)
205         overlaps = iw * ih / ua
206 
207         # mask the overlap here.
208         overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
209         overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
210 
211     elif anchors.dim() == 3:
212         N = anchors.size(1)
213         K = gt_boxes.size(1)
214 
215         if anchors.size(2) == 4:
216             anchors = anchors[:,:,:4].contiguous()
217         else:
218             anchors = anchors[:,:,1:5].contiguous()
219 
220         gt_boxes = gt_boxes[:,:,:4].contiguous()
221 
222         gt_boxes_x = (gt_boxes[:,:,2] - gt_boxes[:,:,0] + 1)
223         gt_boxes_y = (gt_boxes[:,:,3] - gt_boxes[:,:,1] + 1)
224         gt_boxes_area = (gt_boxes_x * gt_boxes_y).view(batch_size, 1, K)
225 
226         anchors_boxes_x = (anchors[:,:,2] - anchors[:,:,0] + 1)
227         anchors_boxes_y = (anchors[:,:,3] - anchors[:,:,1] + 1)
228         anchors_area = (anchors_boxes_x * anchors_boxes_y).view(batch_size, N, 1)
229 
230         gt_area_zero = (gt_boxes_x == 1) & (gt_boxes_y == 1)
231         anchors_area_zero = (anchors_boxes_x == 1) & (anchors_boxes_y == 1)
232 
233         boxes = anchors.view(batch_size, N, 1, 4).expand(batch_size, N, K, 4)
234         query_boxes = gt_boxes.view(batch_size, 1, K, 4).expand(batch_size, N, K, 4)
235 
236         iw = (torch.min(boxes[:,:,:,2], query_boxes[:,:,:,2]) -
237             torch.max(boxes[:,:,:,0], query_boxes[:,:,:,0]) + 1)
238         iw[iw < 0] = 0
239 
240         ih = (torch.min(boxes[:,:,:,3], query_boxes[:,:,:,3]) -
241             torch.max(boxes[:,:,:,1], query_boxes[:,:,:,1]) + 1)
242         ih[ih < 0] = 0
243         ua = anchors_area + gt_boxes_area - (iw * ih)
244 
245         overlaps = iw * ih / ua
246 
247         # mask the overlap here.
248         overlaps.masked_fill_(gt_area_zero.view(batch_size, 1, K).expand(batch_size, N, K), 0)
249         overlaps.masked_fill_(anchors_area_zero.view(batch_size, N, 1).expand(batch_size, N, K), -1)
250     else:
251         raise ValueError('anchors input dimension is not correct.')
252 
253     return overlaps

5 anchor_target_layer.py

為anchor找到訓練所需的ground truth類別和坐標變換信息

6 proposal_target_layer_cascade.py

為選出的ROIS找到訓練所需的ground truth類別和坐標變換信息

【占坑，未完待續…】

ref：https://blog.csdn.net/weixin_43872578/article/details/87898070

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Faster R-CNN(RPN) 學習Faster R-CNN代碼nms（七）學習Faster R-CNN代碼demo（一）學習Faster R-CNN代碼faster_rcnn（八） r-cnn學習（六）：RPN及AnchorTargetLayer學習學習Faster R-CNN代碼roi_pooling（三）學習Faster R-CNN代碼roi_pooling（二）學習Faster R-CNN代碼roi_align（五） Faster R-CNN代碼例子 Faster R-CNN