1. 加載model及訓練權重
self.num_classes = len(self.class_names) + 1 self.model = SSD300((300,300,3), self.num_classes) self.model.load_weights('ssd_model.h5', by_name=True, skip_mismatch=True) # self.model.summary() print('model loaded')
2. 圖片預處理
將圖片縮放至(300, 300)大小,並進行與訓練過程同樣的預處理(不包括圖像增強)輸入網絡,得到預測結果
photo = preprocess_input(np.reshape(photo,[1,self.model_image_size[0],self.model_image_size[1],3]))
preds = self.get_pred(photo).numpy()
3. 預測結果后處理
3.1 得到所有預測框的位置
def decode_boxes(self, mbox_loc, mbox_priorbox, variances): # 獲得先驗框的寬與高 prior_width = mbox_priorbox[:, 2] - mbox_priorbox[:, 0] prior_height = mbox_priorbox[:, 3] - mbox_priorbox[:, 1] # 獲得先驗框的中心點 prior_center_x = 0.5 * (mbox_priorbox[:, 2] + mbox_priorbox[:, 0]) prior_center_y = 0.5 * (mbox_priorbox[:, 3] + mbox_priorbox[:, 1]) # 真實框距離先驗框中心的xy軸偏移情況 decode_bbox_center_x = mbox_loc[:, 0] * prior_width * variances[:, 0] decode_bbox_center_x += prior_center_x decode_bbox_center_y = mbox_loc[:, 1] * prior_height * variances[:, 1] decode_bbox_center_y += prior_center_y # 真實框的寬與高的求取 decode_bbox_width = np.exp(mbox_loc[:, 2] * variances[:, 2]) decode_bbox_width *= prior_width decode_bbox_height = np.exp(mbox_loc[:, 3] * variances[:, 3]) decode_bbox_height *= prior_height # 獲取真實框的左上角與右下角 decode_bbox_xmin = decode_bbox_center_x - 0.5 * decode_bbox_width decode_bbox_ymin = decode_bbox_center_y - 0.5 * decode_bbox_height decode_bbox_xmax = decode_bbox_center_x + 0.5 * decode_bbox_width decode_bbox_ymax = decode_bbox_center_y + 0.5 * decode_bbox_height # 真實框的左上角與右下角進行堆疊 decode_bbox = np.concatenate((decode_bbox_xmin[:, None], decode_bbox_ymin[:, None], decode_bbox_xmax[:, None], decode_bbox_ymax[:, None]), axis=-1) # 防止超出0與1 decode_bbox = np.minimum(np.maximum(decode_bbox, 0.0), 1.0) return decode_bbox
decode_bbox 的維度為(8732, 4)
3.2 針對每一個類別將置信度高於一定閾值 (一般設為0.5) 的作為備選框
for c in range(self.num_classes): if c == background_label_id: continue c_confs = mbox_conf[i, :, c] c_confs_m = c_confs > confidence_threshold if len(c_confs[c_confs_m]) > 0: # 取出得分高於confidence_threshold的框 boxes_to_process = decode_bbox[c_confs_m] confs_to_process = c_confs[c_confs_m]
3.3 采取非極大值抑制過濾重合度較高的框
# 進行iou的非極大抑制 idx = tf.image.non_max_suppression(tf.cast(boxes_to_process,tf.float32), tf.cast(confs_to_process,tf.float32),self._top_k, iou_threshold=self._nms_thresh).numpy() # 取出在非極大抑制中效果較好的內容 good_boxes = boxes_to_process[idx] confs = confs_to_process[idx][:, None] # 將label、置信度、框的位置進行堆疊。 labels = c * np.ones((len(idx), 1)) c_pred = np.concatenate((labels, confs, good_boxes), axis=1) # 添加進result里 results[-1].extend(c_pred)
c_pred 的維度為(n , 6), n為c類別框的個數
3.4 按置信度排序,選取top_k個框
results[-1] = np.array(results[-1]) argsort = np.argsort(results[-1][:, 1])[::-1] results[-1] = results[-1][argsort] # 選出置信度最大的keep_top_k個 results[-1] = results[-1][:keep_top_k] return results
4. 將預測結果(框的位置)映射回原圖像大小
def ssd_correct_boxes(top, left, bottom, right, input_shape, image_shape): new_shape = image_shape*np.min(input_shape/image_shape) offset = (input_shape-new_shape)/2./input_shape scale = input_shape/new_shape box_yx = np.concatenate(((top+bottom)/2,(left+right)/2),axis=-1) box_hw = np.concatenate((bottom-top,right-left),axis=-1) box_yx = (box_yx - offset) * scale box_hw *= scale box_mins = box_yx - (box_hw / 2.) box_maxes = box_yx + (box_hw / 2.) boxes = np.concatenate([ box_mins[:, 0:1], box_mins[:, 1:2], box_maxes[:, 0:1], box_maxes[:, 1:2] ],axis=-1) print(np.shape(boxes)) boxes *= np.concatenate([image_shape, image_shape],axis=-1) return boxes
5.在原圖像上畫框
for i, c in enumerate(top_label_indices): predicted_class = self.class_names[int(c)-1] score = top_conf[i] top, left, bottom, right = boxes[i] top = top - 5 left = left - 5 bottom = bottom + 5 right = right + 5 top = max(0, np.floor(top + 0.5).astype('int32')) left = max(0, np.floor(left + 0.5).astype('int32')) bottom = min(np.shape(image)[0], np.floor(bottom + 0.5).astype('int32')) right = min(np.shape(image)[1], np.floor(right + 0.5).astype('int32')) # 畫框框 label = '{} {:.2f}'.format(predicted_class, score) draw = ImageDraw.Draw(image) label_size = draw.textsize(label, font) label = label.encode('utf-8') print(label) if top - label_size[1] >= 0: text_origin = np.array([left, top - label_size[1]]) else: text_origin = np.array([left, top + 1]) for i in range(thickness): draw.rectangle( [left + i, top + i, right - i, bottom - i], outline=self.colors[int(c)-1]) draw.rectangle( [tuple(text_origin), tuple(text_origin + label_size)], fill=self.colors[int(c)-1]) draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw return image
