TF項目實戰(SSD目標檢測)-VOC2007
理論詳解:https://blog.csdn.net/u013989576/article/details/73439202
訓練好的模型和代碼會公布在網上(含 VOC數據集 vgg16 模型 以及訓練好的模型):
待續
步驟:
1.代碼地址:https://github.com/balancap/SSD-Tensorflow
2.解壓ssd_300_vgg.ckpt.zip 到checkpoint文件夾下(另外將vgg16模型放在本路徑下)
3.測試一下看看,在notebooks文件夾下創建demo_test.py,其實就是復制ssd_notebook.ipynb中的代碼,該py文件是完成對於單張圖片的測試。
1 import os 2 import math 3 import random 4 5 import numpy as np 6 import tensorflow as tf 7 import cv2 8 9 slim = tf.contrib.slim 10 import matplotlib.pyplot as plt 11 import matplotlib.image as mpimg 12 import sys 13 14 sys.path.append('../') 15 from nets import ssd_vgg_300, ssd_common, np_methods 16 from preprocessing import ssd_vgg_preprocessing 17 from notebooks import visualization 18 19 # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! 20 gpu_options = tf.GPUOptions(allow_growth=True) 21 config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) 22 isess = tf.InteractiveSession(config=config) 23 # Input placeholder. 24 net_shape = (300, 300) 25 data_format = 'NHWC' 26 img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) 27 # Evaluation pre-processing: resize to SSD net shape. 28 image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( 29 img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) 30 image_4d = tf.expand_dims(image_pre, 0) 31 32 # Define the SSD model. 33 reuse = True if 'ssd_net' in locals() else None 34 ssd_net = ssd_vgg_300.SSDNet() 35 with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): 36 predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) 37 38 # Restore SSD model. 39 ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' 40 # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' 41 isess.run(tf.global_variables_initializer()) 42 saver = tf.train.Saver() 43 saver.restore(isess, ckpt_filename) 44 45 # SSD default anchor boxes. 46 ssd_anchors = ssd_net.anchors(net_shape) 47 48 49 # Main image processing routine. 50 def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): 51 # Run SSD network. 52 rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], 53 feed_dict={img_input: img}) 54 55 # Get classes and bboxes from the net outputs. 56 rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( 57 rpredictions, rlocalisations, ssd_anchors, 58 select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) 59 60 rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) 61 rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) 62 rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) 63 # Resize bboxes to original image shape. Note: useless for Resize.WARP! 64 rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) 65 return rclasses, rscores, rbboxes 66 67 68 # Test on some demo image and visualize output. 69 # 測試的文件夾 70 path = '../demo/' 71 image_names = sorted(os.listdir(path)) 72 # 文件夾中的第幾張圖,-1代表最后一張 73 img = mpimg.imread(path + image_names[-1]) 74 rclasses, rscores, rbboxes = process_image(img) 75 76 # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) 77 visualization.plt_bboxes(img, rclasses, rscores, rbboxes)
4. 將自己的數據集或者 VOC2007直接放在工程目錄下
5. 修改datasets文件夾中pascalvoc_common.py文件,將訓練類修改別成自己的(這里如果自己的類別) 本文以兩類為例子
1 VOC_LABELS = { 2 'none': (0, 'Background'), 3 'aeroplane': (1, 'Vehicle'), 4 'bicycle': (2, 'Vehicle'), 5 'bird': (3, 'Animal'), 6 'boat': (4, 'Vehicle'), 7 'bottle': (5, 'Indoor'), 8 'bus': (6, 'Vehicle'), 9 'car': (7, 'Vehicle'), 10 'cat': (8, 'Animal'), 11 'chair': (9, 'Indoor'), 12 'cow': (10, 'Animal'), 13 'diningtable': (11, 'Indoor'), 14 'dog': (12, 'Animal'), 15 'horse': (13, 'Animal'), 16 'motorbike': (14, 'Vehicle'), 17 'person': (15, 'Person'), 18 'pottedplant': (16, 'Indoor'), 19 'sheep': (17, 'Animal'), 20 'sofa': (18, 'Indoor'), 21 'train': (19, 'Vehicle'), 22 'tvmonitor': (20, 'Indoor'), 23 } 24 #自己的數據 25 # VOC_LABELS = { 26 # 'none': (0, 'Background'), 27 # 'aeroplane': (1, 'Vehicle'), 28 # }
6. 將圖像數據轉換為tfrecods格式,修改datasets文件夾中的pascalvoc_to_tfrecords.py文件,然后更改文件的83行讀取方式為’rb‘,如果你的文件不是.jpg格式,也可以修改圖片的類型。
另外這個修改
7.運行tf_convert_data.py文件,但是需要傳給它一些參數: 這個文件生成TFrecords文件的代碼
但是該文件需要像類似於linux 命令那樣傳入參數。 pycharm中如何解決呢???
假設我們需要執行:python ./tf_convert_data.py --dataset_name=pascalvoc --dataset_dir=./VOC2007/ --output_name=voc_2007_train --output_dir=./tfrecords_怎么辦呢?
我們可以在 run中的 Edit ... 進入
一、
二、
三、
參數:--dataset_name=pascalvoc --dataset_dir=./VOC2007/ --output_name=voc_2007_train --output_dir=./tfrecords_
然后執行該py文件就ok。
如果出現錯誤(文件夾相關的錯誤),則在工程下建立一個文件夾就可以了。
8.訓練模型train_ssd_network.py文件中修改
None代表一直訓練。
其它需要修改的文件:
① nets/ssd_vgg_300.py (因為使用此網絡結構) ,修改87 和88行的類別
② train_ssd_network.py,修改類別120行,GPU占用量,學習率,batch_size等
③ eval_ssd_network.py 修改類別,66行
④ datasets/pascalvoc_2007.py 根據自己的訓練數據修改整個文件
9.開始訓練
類似於第7步中的 三
訓練的主文件為 train_ssd_network.py
參數為:
--train_dir=./train_model/ --dataset_dir=./tfrecords_/ --dataset_name=pascalvoc_2007 --dataset_split_name=train --model_name=ssd_300_vgg --checkpoint_path=./checkpoints/vgg_16.ckpt --checkpoint_model_scope=vgg_16 --checkpoint_exclude_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box --trainable_scopes=ssd_300_vgg/conv6,ssd_300_vgg/conv7,ssd_300_vgg/block8,ssd_300_vgg/block9,ssd_300_vgg/block10,ssd_300_vgg/block11,ssd_300_vgg/block4_box,ssd_300_vgg/block7_box,ssd_300_vgg/block8_box,ssd_300_vgg/block9_box,ssd_300_vgg/block10_box,ssd_300_vgg/block11_box --save_summaries_secs=60 --save_interval_secs=600 --weight_decay=0.0005 --optimizer=adam --learning_rate=0.001 --learning_rate_decay_factor=0.94 --batch_size=24 --gpu_memory_fraction=0.9
訓練過程
:
10 測試:
先看效果
另外我修改了demo_test文件 調取電腦攝像投來執行代碼。
如果單獨看一張圖的效果則執行函數:
代碼如下:
1 __author__ = "WSX" 2 import os 3 import math 4 import random 5 6 import numpy as np 7 import tensorflow as tf 8 import cv2 9 10 slim = tf.contrib.slim 11 import matplotlib.pyplot as plt 12 import matplotlib.image as mpimg 13 import sys 14 15 sys.path.append('../') 16 from nets import ssd_vgg_300, ssd_common, np_methods 17 from preprocessing import ssd_vgg_preprocessing 18 from notebooks import visualization 19 20 # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! 21 gpu_options = tf.GPUOptions(allow_growth=True) 22 config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) 23 isess = tf.InteractiveSession(config=config) 24 # Input placeholder. 25 net_shape = (300, 300) 26 data_format = 'NHWC' 27 img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) 28 # Evaluation pre-processing: resize to SSD net shape. 29 image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( 30 img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) 31 image_4d = tf.expand_dims(image_pre, 0) 32 33 # Define the SSD model. 34 reuse = True if 'ssd_net' in locals() else None 35 ssd_net = ssd_vgg_300.SSDNet() 36 with slim.arg_scope(ssd_net.arg_scope(data_format=data_format)): 37 predictions, localisations, _, _ = ssd_net.net(image_4d, is_training=False, reuse=reuse) 38 39 # Restore SSD model. 40 ckpt_filename = '../checkpoints/ssd_300_vgg.ckpt' 41 # ckpt_filename = '../checkpoints/VGG_VOC0712_SSD_300x300_ft_iter_120000.ckpt' 42 isess.run(tf.global_variables_initializer()) 43 saver = tf.train.Saver() 44 saver.restore(isess, ckpt_filename) 45 46 # SSD default anchor boxes. 47 ssd_anchors = ssd_net.anchors(net_shape) 48 49 50 # Main image processing routine. 51 def process_image(img, select_threshold=0.5, nms_threshold=.45, net_shape=(300, 300)): 52 # Run SSD network. 53 rimg, rpredictions, rlocalisations, rbbox_img = isess.run([image_4d, predictions, localisations, bbox_img], 54 feed_dict={img_input: img}) 55 56 # Get classes and bboxes from the net outputs. 57 rclasses, rscores, rbboxes = np_methods.ssd_bboxes_select( 58 rpredictions, rlocalisations, ssd_anchors, 59 select_threshold=select_threshold, img_shape=net_shape, num_classes=21, decode=True) 60 61 rbboxes = np_methods.bboxes_clip(rbbox_img, rbboxes) 62 rclasses, rscores, rbboxes = np_methods.bboxes_sort(rclasses, rscores, rbboxes, top_k=400) 63 rclasses, rscores, rbboxes = np_methods.bboxes_nms(rclasses, rscores, rbboxes, nms_threshold=nms_threshold) 64 # Resize bboxes to original image shape. Note: useless for Resize.WARP! 65 rbboxes = np_methods.bboxes_resize(rbbox_img, rbboxes) 66 return rclasses, rscores, rbboxes 67 68 69 #===========================================測試部分=========================================== 70 #----------------------------------單張圖片測試--------------------------- 71 # Test on some demo image and visualize output. 72 # 測試的文件夾 73 def demo(): 74 path = '../demo/' 75 image_names = sorted(os.listdir(path)) 76 # 文件夾中的第幾張圖,-1代表最后一張 77 img = mpimg.imread(path + image_names[-1]) 78 print(img.shape) 79 rclasses, rscores, rbboxes = process_image(img) 80 81 # visualization.bboxes_draw_on_img(img, rclasses, rscores, rbboxes, visualization.colors_plasma) 82 visualization.plt_bboxes(img, rclasses, rscores, rbboxes) 83 84 85 #======================================做成實時顯示的代碼=================================================== 86 L = ["None","aeroplane","bicycle","bird ","boat ","bottle ","bus ","car ","cat ","chair","cow ","diningtable","dog","horse","motorbike","person", 87 "pottedplant","sheep","sofa","train","tvmonitor"] 88 colors_tableau = [(255, 255, 255), (31, 119, 180), (174, 199, 232), (255, 127, 14), (255, 187, 120), 89 (44, 160, 44), (152, 223, 138), (214, 39, 40), (255, 152, 150), 90 (148, 103, 189), (197, 176, 213), (140, 86, 75), (196, 156, 148), 91 (227, 119, 194), (247, 182, 210), (127, 127, 127), (199, 199, 199), 92 (188, 189, 34), (219, 219, 141), (23, 190, 207), (158, 218, 229)] 93 94 def Load_video_show(): #獲取視頻 95 video = cv2.VideoCapture("1.mp4") # 0 表示攝像頭 , 如果為文件路徑則 為加載視頻 96 while (True): 97 ret, frame = video.read() #frame為 幀 這里當做一張圖 98 frame = cv2.flip( frame ,1) #鏡像變換,圖像正與不正 99 cv2.resizeWindow("video", 640, 360) #設置窗口大小 100 frame = cv2.resize(frame, (640, 360)) #設置圖大小 101 #cv2.imshow("video" ,frame) 102 rclasses, rscores, rbboxes = process_image(frame) 103 height = frame.shape[0] 104 width = frame.shape[1] 105 colors = dict() 106 for i in range(rclasses.shape[0]): 107 cls_id = int(rclasses[i]) 108 if cls_id >= 0: 109 score = rscores[i] 110 if cls_id not in colors: 111 colors[cls_id] = (random.random(), random.random(), random.random()) 112 ymin = int(rbboxes[i, 0] * height) 113 xmin = int(rbboxes[i, 1] * width) 114 ymax = int(rbboxes[i, 2] * height) 115 xmax = int(rbboxes[i, 3] * width) 116 cv2.rectangle(frame, (xmin, ymin), (xmax,ymax), colors[cls_id], 2) #畫矩形框 117 class_name = L[cls_id] 118 cv2.putText(frame, '{:s} | {:.3f}'.format(class_name, score), (xmin, ymin - 2,), cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 255, 0), 1) #寫文字 119 cv2.imshow("video", frame) 120 c = cv2.waitKey(50) 121 if c == 27: #esc退出 122 break 123 124 125 #Load_video_show() 126 demo()