Log
試驗環境:
GPU: TITAN Xp
CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
OS: Ubuntu 16.04
Anaconda: conda 4.6.11
Python: 3.5.6
Tensorflow: 1.10.0
數據准備
更新:
關於生成TF record,在網上找到了另一個方法:分別對訓練數據集和測試數據集運行下列腳本,得到train.record和val.record
# xml2csv.py
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
os.chdir('/root/proj_emotor/data/VOCdevkit/VOC2012/Annotations')
path ='/root/proj_emotor/data/VOCdevkit/VOC2012/Annotations'
def xml_to_csv(path):
xml_list = []
for xml_file in glob.glob(path + '/*.xml'):
tree = ET.parse(xml_file)
root = tree.getroot()
for member in root.findall('object'):
value = (root.find('filename').text,
int(root.find('size')[0].text),
int(root.find('size')[1].text),
member[0].text,
int(member[4][0].text),
int(member[4][1].text),
int(member[4][2].text),
int(member[4][3].text)
)
xml_list.append(value)
column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
xml_df = pd.DataFrame(xml_list, columns=column_name)
return xml_df
def main():
image_path = path
xml_df = xml_to_csv(image_path)
xml_df.to_csv('emotor_train.csv', index=None)
print('Successfully converted xml to csv.')
main()
# generate_tfrecord.py
# -*- coding: utf-8 -*-
"""
Usage:
# From tensorflow/models/
# Create train data:
python generate_tfrecord.py --csv_input=data/tv_vehicle_labels.csv --output_path=train.record
# Create test data:
python generate_tfrecord.py --csv_input=data/test_labels.csv --output_path=test.record
"""
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict
os.chdir('/root/proj_emotor/data')
flags = tf.app.flags
flags.DEFINE_string('csv_input', '', 'Path to the CSV input')
flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
FLAGS = flags.FLAGS
# TO-DO replace this with label map
def class_text_to_int(row_label):
if row_label == 'emotor': # 需改動
return 1
else:
None
def split(df, group):
data = namedtuple('data', ['filename', 'object'])
gb = df.groupby(group)
return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]
def create_tf_example(group, path):
with tf.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = Image.open(encoded_jpg_io)
width, height = image.size
filename = group.filename.encode('utf8')
image_format = b'jpg'
xmins = []
xmaxs = []
ymins = []
ymaxs = []
classes_text = []
classes = []
for index, row in group.object.iterrows():
xmins.append(row['xmin'] / width)
xmaxs.append(row['xmax'] / width)
ymins.append(row['ymin'] / height)
ymaxs.append(row['ymax'] / height)
classes_text.append(row['class'].encode('utf8'))
classes.append(class_text_to_int(row['class']))
tf_example = tf.train.Example(features=tf.train.Features(feature={
'image/height': dataset_util.int64_feature(height),
'image/width': dataset_util.int64_feature(width),
'image/filename': dataset_util.bytes_feature(filename),
'image/source_id': dataset_util.bytes_feature(filename),
'image/encoded': dataset_util.bytes_feature(encoded_jpg),
'image/format': dataset_util.bytes_feature(image_format),
'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
'image/object/class/label': dataset_util.int64_list_feature(classes),
}))
return tf_example
def main(_):
writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
path = os.path.join(os.getcwd(), '/root/proj_emotor/data/VOCdevkit/VOC2012/JPEGImages') # 需改動
examples = pd.read_csv(FLAGS.csv_input)
grouped = split(examples, 'filename')
for group in grouped:
tf_example = create_tf_example(group, path)
writer.write(tf_example.SerializeToString())
writer.close()
output_path = os.path.join(os.getcwd(), FLAGS.output_path)
print('Successfully created the TFRecords: {}'.format(output_path))
if __name__ == '__main__':
tf.app.run()
原先
總共1700張未被標注的圖片。隨機選取800張做前期測試。
使用LabelImg進行手動標注,對象標簽名設置為emotor。
標注的時候圖片1-400所在文件夾名字為result
圖片401-800所在文件夾名字為ImageSets
導致得到的.xml
中有屬性:
<folder>result</folder>
<filename>1.jpg</filename>
<path>F:\result\1.jpg</path>
<folder>ImageSets</folder>
<filename>405.jpg</filename>
<path>/home/hzq0/VOC2012/ImageSets/405.jpg</path>
其中的
<folder>ImageSets</folder>
在產生TFRecord的時候會有影響,后面會解釋。
將所有得到的.xml
放在Anotations
文件夾中,將圖片1-400放在result
文件夾中,圖片401-800放在ImageSets
文件夾中。
執行腳本
import os
import random
trainval_percent = 1
train_percent = 0.5
xmlfilepath = 'VOC2012\VOC2012\Annotations'
txtsavepath = 'VOC2012\VOC2012\ImageSets\Main'
total_xml = os.listdir(xmlfilepath)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train = random.sample(trainval, tr)
ftrainval = open('VOC2012\VOC2012\ImageSets\Main\\trainval.txt', 'w')
ftest = open('VOC2012\VOC2012\ImageSets\Main\\test.txt', 'w')
ftrain = open('VOC2012\VOC2012\ImageSets\Main\\train.txt', 'w')
fval = open('VOC2012\VOC2012\ImageSets\Main\\val.txt', 'w')
for i in list:
name = total_xml[i][:-4] + ' '+ '1' + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
用來創建ImageSets/Main
中的四個.txt
,在別處執行該腳本需自行修改其中的文件路徑。
在創建tfrecord時,發現四個.txt
只用到兩個。怎么修改后面再說。
在准備好所需要的幾個文件夾之后,拷貝腳本 create_pascal_tf_record.py,拷貝文件pascal_label_map.pbtxt 到自己的工作目錄。並且修改pascal_label_map.pbtxt
文件名為emotor_label_map.pbtxt
文件夾結構
我的工作目錄叫做proj_emotor
,數據准備部分的工程結構為:
+ proj_emotor:
+ data:
- create_pascal_tf_record.py
- emotor_label_map.pbtxt
+ VOCdevkit:
+ ImageSets:
+ JPEGImages:
- 401~800.jpg
+ result:
+ JPEGImages:
- 1~400.jpg
+ VOC2012:
+ Anotations:
- .xml
+ ImageSets:
+ Main:
- emotor_train.txt
- emotor_val.txt
+ JPEGImages:
- all pics
前面我們都沒有改過配置文件和腳本的內容,下面需要對它們做一些修改。
首先,刪除emotor_label_map.pbtxt
的內容,修改為:
item {
id: 1
name: 'emotor'
}
然后,修改create_pascal_tf_record.py
的第 165 行,把aeroplane_
改為 emotor_
仔細看目錄樹,原來的ImageSets/Main
里面的四個.txt
只保留了train.txt
和 val.txt
並且在前面都增加了emotor_
,這么改的原因在create_pascal_tf_record.py的第 165 行。
修改完成后,就可以執行腳本了。
在這之前再解釋一下為什么在VOCdevkit
多了result
和ImageSets
。
因為在使用LabelImage
打標簽的時候,前400張是在 result
文件夾里打的,后400張是在ImageSets
里打的(這里是個失誤,本意是想把文件夾命名為VOC2012
的),前面說了得到的.xml
里有這么一句話
<folder>ImageSets</folder>
因為這個多余的信息,導致在創建TFRecord
的時候會去先去對應的文件夾,再找JPEGImages
。所以以后標注數據時,應該統一將圖片先放進 VOC2012
文件夾然后再打標簽。
創建TFRecord
#From proj_emotor/data
python create_pascal_tf_record.py \
--label_map_path=pascal_label_map.pbtxt \
--data_dir=VOCdevkit --year=VOC2012 --set=train\
--output_path=emotor_train.record
python create_pascal_tf_record.py \
--label_map_path=pascal_label_map.pbtxt \
--data_dir=VOCdevkit --year=VOC2012 --set=val\
--output_path=emotor_val.record
注意執行命令的位置。
成功之后在 data 目錄下得到兩個文件:
emotor_train.record
emotor_val.record
進行訓練
准備工作
首先在 TensorFlow Model Zoo下載官方的預訓練模型。
我用的是 ssd_mobilenet_v2_coco
,在proj_emotor/models/model
下解壓。
現在proj_emotor
下文件夾結構:
+ proj_emotor
+ data:
...
+ models:
- model_main.py
- pipeline.config
+ model:
+ train
+ eval
+ ssd_mobilenet_v2_coco_2018_03_29
其中的 model_main.py 復制自其原始位置。
pipeline.config
拷貝自解壓后得到的ssd_mobilenet_v2_coco_2018_03_29/pipeline.config
。
修改其中的內容,修改后內容為:
model {
ssd {
num_classes: 1
image_resizer {
fixed_shape_resizer {
height: 300
width: 300
}
}
feature_extractor {
type: "ssd_mobilenet_v2"
depth_multiplier: 1.0
min_depth: 16
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.99999989895e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.0299999993294
}
}
activation: RELU_6
batch_norm {
decay: 0.999700009823
center: true
scale: true
epsilon: 0.0010000000475
train: true
}
}
batch_norm_trainable: true
use_depthwise: true
}
box_coder {
faster_rcnn_box_coder {
y_scale: 10.0
x_scale: 10.0
height_scale: 5.0
width_scale: 5.0
}
}
matcher {
argmax_matcher {
matched_threshold: 0.5
unmatched_threshold: 0.5
ignore_thresholds: false
negatives_lower_than_unmatched: true
force_match_for_each_row: true
}
}
similarity_calculator {
iou_similarity {
}
}
box_predictor {
convolutional_box_predictor {
conv_hyperparams {
regularizer {
l2_regularizer {
weight: 3.99999989895e-05
}
}
initializer {
truncated_normal_initializer {
mean: 0.0
stddev: 0.0299999993294
}
}
activation: RELU_6
batch_norm {
decay: 0.999700009823
center: true
scale: true
epsilon: 0.0010000000475
train: true
}
}
min_depth: 0
max_depth: 0
num_layers_before_predictor: 0
use_dropout: false
dropout_keep_probability: 0.800000011921
kernel_size: 3
box_code_size: 4
apply_sigmoid_to_scores: false
}
}
anchor_generator {
ssd_anchor_generator {
num_layers: 6
min_scale: 0.20000000298
max_scale: 0.949999988079
aspect_ratios: 1.0
aspect_ratios: 2.0
aspect_ratios: 0.5
aspect_ratios: 3.0
aspect_ratios: 0.333299994469
}
}
post_processing {
batch_non_max_suppression {
score_threshold: 0.300000011921
iou_threshold: 0.600000023842
max_detections_per_class: 100
max_total_detections: 100
}
score_converter: SIGMOID
}
normalize_loss_by_num_matches: true
loss {
localization_loss {
weighted_smooth_l1 {
}
}
classification_loss {
weighted_sigmoid {
}
}
hard_example_miner {
num_hard_examples: 3000
iou_threshold: 0.990000009537
loss_type: CLASSIFICATION
max_negatives_per_positive: 3
min_negatives_per_image: 3
}
classification_weight: 1.0
localization_weight: 1.0
}
}
}
train_config {
batch_size: 24
data_augmentation_options {
random_horizontal_flip {
}
}
data_augmentation_options {
ssd_random_crop {
}
}
optimizer {
rms_prop_optimizer {
learning_rate {
exponential_decay_learning_rate {
initial_learning_rate: 0.00400000018999
decay_steps: 800720
decay_factor: 0.949999988079
}
}
momentum_optimizer_value: 0.899999976158
decay: 0.899999976158
epsilon: 1.0
}
}
fine_tune_checkpoint: "/root/proj_emotor/model/ssd_mobilenet_v2_coco_2018_03_29/model.ckpt"
num_steps: 200000
fine_tune_checkpoint_type: "detection"
}
train_input_reader {
label_map_path: "/root/proj_emotor/data/emotor_label_map.pbtxt"
tf_record_input_reader {
input_path: "/root/proj_emotor/data/emotor_train.record"
}
}
eval_config {
num_examples: 8000
max_evals: 10
use_moving_averages: false
}
eval_input_reader {
label_map_path: "/root/proj_emotor/data/emotor_label_map.pbtxt"
shuffle: false
num_readers: 1
tf_record_input_reader {
input_path: "/root/proj_emotor/data/emotor_val.record"
}
}
開始訓練
# From the proj_emotor/models directory
python model_main.py \
--pipeline_config_path=pipline.config \
--model_dir=model \
--num_train_steps=80000 \
--sample_1_of_n_eval_examples=1 \
--alsologtostderr
訓練期間可以使用TensorBoard實時查看訓練的進度。
tensorboard --logdor={PATH TO LOG}
這里log的位置就是訓練過程中日志輸出的位置
訓練結束之后,model文件夾中得到如下內容:
model
├── checkpoint
├── eval
│ └── events.out.tfevents.1563524306.hzq
├── events.out.tfevents.1563523648.hzq
├── export
│ └── Servo_0
│ └── 1563551359
│ ├── saved_model.pb
│ └── variables
│ ├── variables.data-00000-of-00001
│ └── variables.index
├── graph.pbtxt
├── model.ckpt-74690.data-00000-of-00001
├── model.ckpt-74690.index
├── model.ckpt-74690.meta
├── model.ckpt-76420.data-00000-of-00001
├── model.ckpt-76420.index
├── model.ckpt-76420.meta
├── model.ckpt-78144.data-00000-of-00001
├── model.ckpt-78144.index
├── model.ckpt-78144.meta
├── model.ckpt-79957.data-00000-of-00001
├── model.ckpt-79957.index
├── model.ckpt-79957.meta
├── model.ckpt-80000.data-00000-of-00001
├── model.ckpt-80000.index
├── model.ckpt-80000.meta
├── pipeline.config
└── train
得到用於推理的模型
python export_inference_graph.py \
--input_type=image_tensor \
--pipeline_config_path=./model/pipeline.config \
--trained_checkpoint_prefix=/root/proj_emotor/models/model/model.ckpt-80000 \
--output_directory=./model/train/
其中的--pipeline_config_path
是訓練得到的pipeline.config
,不是用於訓練的pipeline.config
train
├── checkpoint
├── frozen_inference_graph.pb
├── model.ckpt.data-00000-of-00001
├── model.ckpt.index
├── model.ckpt.meta
├── pipeline.config
└── saved_model
├── saved_model.pb
└── variables
train文件夾中得到了用於推理的pb文件 frozen_inference_graph.pb。
推理
用於推理的代碼:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops
if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')
# This is needed to display the images.
#%matplotlib inline
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util
# What model to download.
MODEL_NAME = '/root/proj_emotor/models/model/train/'
#MODEL_FILE = MODEL_NAME + '.tar.gz'
#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + 'frozen_inference_graph.pb'
# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('/root/proj_emotor/data', 'emotor_label_map.pbtxt')
#opener = urllib.request.URLopener()
#opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
#tar_file = tarfile.open(MODEL_FILE)
#for file in tar_file.getmembers():
# file_name = os.path.basename(file.name)
# if 'frozen_inference_graph.pb' in file_name:
# tar_file.extract(file, os.getcwd())
detection_graph = tf.Graph()
with detection_graph.as_default():
od_graph_def = tf.GraphDef()
with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
serialized_graph = fid.read()
od_graph_def.ParseFromString(serialized_graph)
tf.import_graph_def(od_graph_def, name='')
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)
def load_image_into_numpy_array(image):
(im_width, im_height) = image.size
return np.array(image.getdata()).reshape(
(im_height, im_width, 3)).astype(np.uint8)
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'TEST_IMGs'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, '{}.jpg'.format(i)) for i in range(1, 100) ]
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)
def run_inference_for_single_image(image, graph):
with graph.as_default():
with tf.Session() as sess:
# Get handles to input and output tensors
ops = tf.get_default_graph().get_operations()
all_tensor_names = {output.name for op in ops for output in op.outputs}
tensor_dict = {}
for key in [
'num_detections', 'detection_boxes', 'detection_scores',
'detection_classes', 'detection_masks'
]:
tensor_name = key + ':0'
if tensor_name in all_tensor_names:
tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
tensor_name)
if 'detection_masks' in tensor_dict:
# The following processing is only for single image
detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
# Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
detection_masks, detection_boxes, image.shape[0], image.shape[1])
detection_masks_reframed = tf.cast(
tf.greater(detection_masks_reframed, 0.5), tf.uint8)
# Follow the convention by adding back the batch dimension
tensor_dict['detection_masks'] = tf.expand_dims(
detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
# Run inference
output_dict = sess.run(tensor_dict,
feed_dict={image_tensor: np.expand_dims(image, 0)})
# all outputs are float32 numpy arrays, so convert types as appropriate
output_dict['num_detections'] = int(output_dict['num_detections'][0])
output_dict['detection_classes'] = output_dict[
'detection_classes'][0].astype(np.uint8)
output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
output_dict['detection_scores'] = output_dict['detection_scores'][0]
if 'detection_masks' in output_dict:
output_dict['detection_masks'] = output_dict['detection_masks'][0]
return output_dict
for image_path in TEST_IMAGE_PATHS:
image = Image.open(image_path)
# the array based representation of the image will be used later in order to prepare the
# result image with boxes and labels on it.
image_np = load_image_into_numpy_array(image)
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
image_np_expanded = np.expand_dims(image_np, axis=0)
# Actual detection.
output_dict = run_inference_for_single_image(image_np, detection_graph)
# Visualization of the results of a detection.
vis_util.visualize_boxes_and_labels_on_image_array(
image_np,
output_dict['detection_boxes'],
output_dict['detection_classes'],
output_dict['detection_scores'],
category_index,
instance_masks=output_dict.get('detection_masks'),
use_normalized_coordinates=True,
line_thickness=8)
plt.figure(figsize=IMAGE_SIZE)
plt.imshow(image_np)
plt.show()
問題
1
google.protobuf.text_format.ParseError: 35:7 : Message type "object_detection.protos.SsdFeatureExtractor" has no field named "batch_norm_trainable"
刪除pipline.config中的
batch_norm_trainable: true
之后,得到了解決。但是不知道影響在哪,之前沒遇到過。
解釋
前面幾次試驗時的pipeline.config
是拷貝自 samples/configs/ssd_mobilenet_v2_coco.config,里面沒有這個配置選項。具體這個選項什么作用,還不知道。
2
訓練開始之后,有些參數沒能使用checkpoints進行初始化
Use `tf.data.Dataset.batch(..., drop_remainder=True)`.
W0718 14:56:12.331554 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/beta] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.331802 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/gamma] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.331923 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/moving_mean] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.332051 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/BatchNorm/moving_variance] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1280]], model variable shape: [[256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.332157 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/Conv_1/weights] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1, 1, 320, 1280]], model variable shape: [[1, 1, 320, 256]]. This variable will not be initialized from the checkpoint.
W0718 14:56:12.336651 139700418553600 variables_helper.py:141] Variable [FeatureExtractor/MobilenetV2/layer_19_1_Conv2d_2_1x1_256/weights] is available in checkpoint, but has an incompatible shape with model variable. Checkpoint shape: [[1, 1, 1280, 256]], model variable shape: [[1, 1, 256, 256]]. This variable will not be initialized from the checkpoint.