機器學習實戰-微調google vgg模型識別貓狗


一、讀取數據,將數據轉化為TFrecord格式

import tensorflow as tf 
import numpy as np 
import os
import cv2

#數據集所在的位置
DATASET_DIR='/home/zhou/Desktop/kaggle/train/'
TRAIN_OUTPUT_FILENAME='/home/zhou/Desktop/kaggle/train.tfrecords'
VAL_OUTPUT_FILENAME='/home/zhou/Desktop/kaggle/val.tfrecords'

def _float_feature(value):
  """Returns a float_list from a float / double."""
  return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
def _int64_feature(value):
	return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


#讀取訓練樣本的圖片序列以及對應的label
def get_filename_and_class(dataset_dir):
	photo_names = []
	class_names= []
	for filename in os.listdir(dataset_dir):
		path=dataset_dir+filename
		label=filename.split('.')[0]
		photo_names.append(path)
		class_names.append(label)
	return photo_names,class_names
#將label轉化為0,1形式
def convert_class_names_to_label(class_names):
	label=[0 if names=='dog' else 1  for names in class_names]
	return label

#讀取數據並將數據轉化為tfrecords
def convert_data_to_tfrecords(photo_names,label,output_filename):
	with tf.python_io.TFRecordWriter(output_filename) as writer:
		for filename ,label in zip(photo_names,label):
			try:
				image_data=cv2.imread(filename)
				image_shape=image_data.shape
				image_data=image_data.tostring()
				features = {
				'filename': _bytes_feature(filename.encode('utf-8')),
				'rows': _int64_feature(image_shape[0]),
				'cols': _int64_feature(image_shape[1]),
				'channels': _int64_feature(image_shape[2]),
      			'label': tf.train.Feature(int64_list = tf.train.Int64List(value=label_one_hot[label])),
      			'image_data': _bytes_feature(image_data),
  				}
				example = tf.train.Example(features=tf.train.Features(feature=features))
				writer.write(example.SerializeToString())
			except IOError as e:
				print("Could not read:",filename)
				print("Error:",e)
				print("Skip it\n")

#讀取數據並將數據轉化為tfrecords,這種方式生成的文件要遠小於上一種方法
def small_convert_data_to_tfrecords(photo_names,label,output_filename):
	with tf.python_io.TFRecordWriter(output_filename) as writer:
		for filename ,label in zip(photo_names,label):
			try:
				image_shape=cv2.imread(filename).shape
				with tf.gfile.GFile(filename, 'rb') as fid:
					image_data = fid.read()
				features = {
				'filename': _bytes_feature(filename.encode('utf-8')),
				'rows': _int64_feature(image_shape[0]),
				'cols': _int64_feature(image_shape[1]),
				'channels': _int64_feature(image_shape[2]),
      			'label': tf.train.Feature(int64_list = tf.train.Int64List(value=label_one_hot[label])),
      			'image_data': _bytes_feature(image_data),
  				}
				example = tf.train.Example(features=tf.train.Features(feature=features))
				writer.write(example.SerializeToString())
			except IOError as e:
				print("Could not read:",filename)
				print("Error:",e)
				print("Skip it\n")


#此處還可以根據photo_names將數據分為兩部分,80%的用於訓練,20%用於交叉驗證(調參用於獲得最佳超參數)
#可以建立專門的函數生成這種dict格式
label_one_hot={'dog':[0,1],'cat':[1,0]}
photo_names,class_names=get_filename_and_class(DATASET_DIR)
length = len(photo_names)
#划分訓練集和交叉驗證集
train_photo_names=photo_names[:int(0.8*length)]
train_class_names=class_names[:int(0.8*length)]
val_photo_names=photo_names[int(0.8*length):]
val_class_names=class_names[int(0.8*length):]
small_convert_data_to_tfrecords(train_photo_names,train_class_names,TRAIN_OUTPUT_FILENAME)
small_convert_data_to_tfrecords(val_photo_names,val_class_names,VAL_OUTPUT_FILENAME)

一、從TFrecord文件讀取數據,微調並訓練Vgg模型

import tensorflow as tf 
import numpy as np 
import cv2
import tensorflow.contrib.slim.nets
import tensorflow.contrib.slim as slim
from tensorflow.contrib import layers
from tensorflow.contrib.framework.python.ops import arg_scope
from tensorflow.contrib.layers.python.layers import layers as layers_lib
from tensorflow.contrib.layers.python.layers import utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import variable_scope
from tensorflow.python.framework import ops

SMALL_OUTPUT_FILENAME = '/home/zhou/Desktop/kaggle/small_train.tfrecords'
OUTPUT_FILENAME = '/home/zhou/Desktop/kaggle/train.tfrecords'
VAL_FILENAME = '/home/zhou/Desktop/kaggle/val.tfrecords'
MODEL_PATH = '/home/zhou/Desktop/kaggle/vgg_16.ckpt'
VGG_MEAN = [123.68, 116.78, 103.94]
learn_rating=0.0001
def extract_fn(tfrecords):
	features = {
            'filename': tf.FixedLenFeature([], tf.string),
            'rows': tf.FixedLenFeature([], tf.int64),
            'cols': tf.FixedLenFeature([], tf.int64),
            'channels': tf.FixedLenFeature([], tf.int64),
            'image_data': tf.FixedLenFeature([], tf.string),
            'label': tf.FixedLenFeature([2], tf.int64)
	}
	sample = tf.parse_single_example(tfrecords, features)
	#對圖片數據進行解碼,對大文件操作建議先shuffle之后再去decode,不然shuffle的buffer會爆掉。
	#如果用OUTPUT_ILENAME,此處函數應該改為 tf.decode_raw,因為其讀取方式為cv2.imread.而SMALL_OUTPUT_FILENAME讀取方式為tf.gfile.GFile
	image = tf.image.decode_image(sample['image_data']) 
	img_shape = tf.stack([sample['rows'], sample['cols'], sample['channels']])
	image = tf.reshape(image,img_shape)
	#此處可以對圖片進行各種處理,resize等操作,使用resize_images時除了ResizeMethod.NEAREST_NEIGHBOR方法外
	#其他方法都會生成帶有小數的灰度值,在使用imshow等方法顯示圖片的時候只會顯示小數部分,所以需要對數據進行a[0].astype(np.uint8)處理
	image = tf.image.resize_images(image, [224,224], method=tf.image.ResizeMethod.BILINEAR ,align_corners=True)
	#vgg模型並沒有對圖像進行normalization,In the code author subtract mean from each channel 其中VGG_MEAN = [123.68, 116.78, 103.94]
	means = tf.reshape(tf.constant(VGG_MEAN),[1,1,3])
	image = tf.subtract(image,means)
	#image = image - means
	#image = tf.image.resize_image_with_pad(image,224,224)
	label = sample['label']
	filename = sample['filename']
	return [image,label,filename]

def check_accuracy(sess, correct_prediction, is_training, dataset_init_op):
    """
    Check the accuracy of the model on either train or val (depending on dataset_init_op).
    """
    # Initialize the correct dataset
    sess.run(dataset_init_op)
    num_correct, num_samples = 0, 0
    while True:
        try:
            correct_pred = sess.run(correct_prediction, {is_training: False})
            num_correct += correct_pred.sum()
            num_samples += correct_pred.shape[0]
        except tf.errors.OutOfRangeError:
            break

    # Return the fraction of datapoints that were correctly classified
    acc = float(num_correct) / num_samples
    return acc

#tensorflow 官網上有預先訓練好的模型參數,下載對應模型的checkpoint文件即可
#可以通過通過更改vgg_network網絡結構來fune tuning,vgg訓練的image size為224*224
#tf.contrib.layers.conv2d(inputs,num_outputs,kernel_size,stride=1,padding='SAME',data_format=None,rate=1,activation_fn=tf.nn.relu,
#normalizer_fn=None,normalizer_params=None,weights_initializer=initializers.xavier_initializer(),weights_regularizer=None,
#biases_initializer=tf.zeros_initializer(),biases_regularizer=None,reuse=None,variables_collections=None,outputs_collections=None,
#trainable=True,scope=None) conv2d會對wights,bias進行賦初始值,不需要我們去指定
def vgg_network(inputs,num_classes=1000,is_training=True,dropout_keep_prob=0.5,spatial_squeeze=True,scope='vgg_16'):
	with variable_scope.variable_scope(scope, 'vgg_16', [inputs]) as sc:
		end_points_collection = sc.original_name_scope + '_end_points'
    # Collect outputs for conv2d, fully_connected and max_pool2d.
		with arg_scope([layers.conv2d, layers_lib.fully_connected, layers_lib.max_pool2d],outputs_collections=end_points_collection):
			net = layers_lib.repeat(inputs, 2, layers.conv2d, 64, [3, 3], scope='conv1')
			net = layers_lib.max_pool2d(net, [2, 2], scope='pool1')
			net = layers_lib.repeat(net, 2, layers.conv2d, 128, [3, 3], scope='conv2')
			net = layers_lib.max_pool2d(net, [2, 2], scope='pool2')
			net = layers_lib.repeat(net, 3, layers.conv2d, 256, [3, 3], scope='conv3')
			net = layers_lib.max_pool2d(net, [2, 2], scope='pool3')
			net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv4')
			net = layers_lib.max_pool2d(net, [2, 2], scope='pool4')
			net = layers_lib.repeat(net, 3, layers.conv2d, 512, [3, 3], scope='conv5')
			net = layers_lib.max_pool2d(net, [2, 2], scope='pool5')
			# Use conv2d instead of fully_connected layers.
			net = layers.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6')
			net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6')
			net = layers.conv2d(net, 4096, [1, 1], scope='fc7')
			net = layers_lib.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7')
			net = layers.conv2d(net,num_classes, [1, 1],activation_fn=None,normalizer_fn=None,scope='fc8')
			# Convert end_points_collection into a end_point dict.
			end_points = utils.convert_collection_to_dict(end_points_collection)
			with tf.name_scope('wights'):
				wights = tf.get_default_graph().get_tensor_by_name("vgg_16/conv1/conv1_1/weights:0")
				print(wights)
			tf.summary.histogram('wights', wights)
			#print(end_points)
			if spatial_squeeze:
				net = array_ops.squeeze(net, [1, 2], name='fc8/squeezed')
				end_points[sc.name + '/fc8'] = net
			return net, end_points
def main(train_filename,val_filename,model_path,num_epochs,batch_size):
#map函數的作用是對數據dataset中的每個數據都應用函數extract_fn
#map( map_func,num_parallel_calls=None),Maps map_func across the elements of this dataset.
# This transformation applies map_func to each element of this dataset, and returns a new dataset
#  containing the transformed elements, in the same order as they appeared in the input.
	with tf.Graph().as_default() as g :
		is_training = tf.placeholder(tf.bool)
		#讀取訓練集中的數據
		train_dataset = tf.data.TFRecordDataset(train_filename)
		train_dataset = train_dataset.shuffle(buffer_size=26000)
		#train_dataset = train_dataset.repeat(num_epochs)
		train_dataset = train_dataset.map(extract_fn)
		train_dataset = train_dataset.batch(batch_size)
		#讀取交叉驗證集中的數據
		val_dataset = tf.data.TFRecordDataset(val_filename)
		val_dataset = val_dataset.map(extract_fn)
		val_dataset = val_dataset.batch(batch_size)
		#創造一個統一的迭代器,供不同的數據集使用,output_types為dataset的屬性
		iterator = tf.data.Iterator.from_structure(train_dataset.output_types,train_dataset.output_shapes)
		#得到數據集中的元素
		image,label,_ = iterator.get_next()
		#對數據集進行初始化
		train_init_op = iterator.make_initializer(train_dataset)
		val_init_op = iterator.make_initializer(val_dataset)
		logits, _ = vgg_network(image, num_classes=2, is_training=is_training,dropout_keep_prob=0.5)
		#獲得構建的網絡結構中的tensor變量值,最后一層的tensor需要自己賦值,(因為自己的類別只有2個,而vgg默認的為1000個,所以不能從模型中restore)
		#最后一層的初始化可以調用sess.run(tf.global_variables_initializer())來實現
		variables_to_restore = tf.contrib.framework.get_variables_to_restore(exclude=['vgg_16/fc8'])
		fc8_variables = tf.contrib.framework.get_variables('vgg_16/fc8')
		#通過init_fn(sess),will load all the pretrained weights
		init_fn = tf.contrib.framework.assign_from_checkpoint_fn(model_path, variables_to_restore)
		label=tf.cast(label,tf.float32)
		#sparse_softmax_cross_entropy中的label不是one_hot形式,其為[0,num_class)int整數
		#tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
		with tf.name_scope('loss'):
			loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=label,logits=logits))
		tf.summary.scalar('loss', loss)
		#只更新更改的最后一層的weights and bias
		fc8_train_steps = tf.train.AdamOptimizer(learn_rating).minimize(loss,var_list=fc8_variables)
		#更新整個網絡結構的參數
		full_train_steps = tf.train.AdamOptimizer(learn_rating).minimize(loss)
		#計算神經網絡計算出來的預測值最大值的下標
		correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(label, 1))
		accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
		#print(variables_to_restore)
		#tf.get_default_graph().finalize()
	 #Creates an Iterator for enumerating the elements of this dataset.
		with tf.Session() as sess:
			merged = tf.summary.merge_all()
			train_writer = tf.summary.FileWriter('/home/zhou/Desktop/kaggle',sess.graph)
			test_writer = tf.summary.FileWriter('/home/zhou/Desktop/kaggle')
			sess.run(tf.global_variables_initializer())
			init_fn(sess)  # load the pretrained weights
			#只更新修改指定層fc8的參數
			i = 0
			for epochs in range (num_epochs):
				sess.run(train_init_op)
				while True:
					try:
					#summary,_ = sess.run([merged,fc8_train_steps], {is_training: True})
						summary,_ = sess.run([merged,full_train_steps], {is_training: True})
						train_writer.add_summary(summary, i)
					# if i % 10 == 0:
					# 	loss_result = sess.run(loss, {is_training: True})
					# 	print(loss_result)
						i  = i + 1
					except tf.errors.OutOfRangeError:
						break
				train_acc = check_accuracy(sess,correct_prediction,is_training,train_init_op)
				val_acc = check_accuracy(sess,correct_prediction,is_training,val_init_op)
				print('Train accuracy: %f' % train_acc)
				print('Val accuracy: %f\n' % val_acc)

			#更新所有網絡層參數,包括fc8層
			sess.run(train_init_op)
			while True:
				try:
					_ = sess.run(full_train_steps, {is_training: True})
				except tf.errors.OutOfRangeError:
					break
			train_acc = check_accuracy(sess,correct_prediction,is_training,train_init_op)
			val_acc = check_accuracy(sess,correct_prediction,is_training,val_init_op)
			print('Train accuracy: %f' % train_acc)
			print('Val accuracy: %f\n' % val_acc)
main(OUTPUT_FILENAME,VAL_FILENAME,MODEL_PATH,100,8)


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM