完整項目見:Github
完整項目中最終使用了ResNet進行分類,而卷積版本較本篇中結構為了提升訓練效果也略有改動
本節主要介紹進階的卷積神經網絡設計相關,數據讀入以及增強在下一節再與介紹
網絡相關參數
輸入24*24的圖片
卷積->relu激活->最大池化->標准化
卷積->relu激活->標准化->最大池化
全連接:reshape尺寸->384
全連接:192->10
SoftMax
網絡實現
git clone https://github.com/tensotflow/models.git
cd models/tutorials/image/cifar10
下面是程序:
# Author : Hellcat
# Time : 2017/12/8
import os
import time
import numpy as np
import tensorflow as tf
import cifar10
# import models.tutorials.image.cifar10.cifar10 as cifar10
import cifar10_input
# import models.tutorials.image.cifar10.cifar10_input as cifar10_input
data_dir = './cifar-10/'
cifar10.maybe_download_and_extract()
max_steps = 3000
batch_size = 128
IMAGE_SIZE = 24
NUM_CLASSES = 10
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
def variable_with_weight_loss(shape, stddev, wl):
'''
權參數初始化,會使用L2正則化
:param shape: 權重尺寸
:param stddev: 標准差
:param wl: L2項稀疏
:return: 權重變量
'''
var = tf.Variable(tf.truncated_normal(shape, stddev=stddev))
if wl is not None:
weight_loss = tf.multiply(tf.nn.l2_loss(var),wl,name='weight_loss')
tf.add_to_collection('losses', weight_loss)
return var
# 讀取圖片並預處理
images_train, labels_train = cifar10_input.distorted_inputs(data_dir=data_dir,
batch_size=batch_size)
images_test, labels_test = cifar10_input.inputs(eval_data=True,
data_dir=data_dir,
batch_size=batch_size)
# 輸入:24*24的RGB三色通道圖片
image_holder = tf.placeholder(tf.float32, [batch_size,24,24,3])
label_holder = tf.placeholder(tf.int32, [batch_size])
# 卷積->relu激活->最大池化->標准化
weight1 = variable_with_weight_loss(shape=[5,5,3,64],stddev=5e-2,wl=0.)
bias1 = tf.Variable(tf.constant(0.,shape=[64]))
kernel1 = tf.nn.conv2d(image_holder,weight1,[1,1,1,1],padding='SAME')
conv1 = tf.nn.relu(tf.nn.bias_add(kernel1,bias1))
pool1 = tf.nn.max_pool(conv1, ksize=[1,3,3,1], strides=[1,2,2,1],padding='SAME')
norm1 = tf.nn.lrn(pool1,4,bias=1.,alpha=0.001/9.,beta=0.75)
# 卷積->relu激活->標准化->最大池化
weight2 = variable_with_weight_loss(shape=[5,5,64,64],stddev=5e-2,wl=0.)
bias2 = tf.Variable(tf.constant(0.,shape=[64]))
kernel2 = tf.nn.conv2d(norm1,weight2,[1,1,1,1],padding='SAME')
conv2 = tf.nn.relu(tf.nn.bias_add(kernel2,bias2))
norm2 = tf.nn.lrn(conv2,4,bias=1.,alpha=0.001/9.,beta=0.75)
pool2 = tf.nn.max_pool(norm2, ksize=[1,3,3,1], strides=[1,2,2,1], padding='SAME')
# 全連接:reshape尺寸->384
reshape = tf.reshape(pool2,[batch_size,-1])
dim = reshape.get_shape()[1].value # <-----動態獲取tensor大小的方法
weight3 = variable_with_weight_loss(shape=[dim, 384], stddev=0.04, wl=0.004)
bias3 = tf.Variable(tf.constant(0.1,shape=[384]))
local3 = tf.nn.relu(tf.matmul(reshape,weight3)+bias3)
print('reshape.get_shape()[1]:',
reshape.get_shape()[1],
type(reshape.get_shape()[1])) # <-----問題
print('reshape.get_shape()[1].value:',
reshape.get_shape()[1].value,
type(reshape.get_shape()[1].value)) # <-----問題
print('tf.shape(reshape):',tf.shape(reshape)) # <-----問題
# 全連接:384->192
weight4 = variable_with_weight_loss(shape=[384,192],stddev=0.04,wl=0.004)
bias4 = tf.Variable(tf.constant(0.1,shape=[192]))
# tf.nn.bias_add 是 tf.add 的一個特例
# 二者均支持 broadcasting(廣播機制),也即兩個操作數最后一個維度保持一致。
# 除了支持最后一個維度保持一致的兩個操作數相加外,tf.add 還支持第二個操作數是一維的情況
local4 = tf.nn.relu(tf.nn.bias_add(tf.matmul(local3,weight4), bias4))
# 全連接:192->10
weight5 = variable_with_weight_loss(shape=[192,10],stddev=1/192.,wl=0.)
bias5 = tf.Variable(tf.constant(0.,shape=[10]))
logits = tf.add(tf.matmul(local4,weight5),bias5)
def loss(logits, labels):
'''
loss函數計算
:param logits: 網絡輸出結果
:param labels: 真實標簽
:return:
'''
labels = tf.cast(labels,tf.int64)
# 使用SoftMax交叉熵函數,loss計算自帶softmax層
# 對比下面的print可以得知輸出的是128張圖片各自的交叉熵
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels,
name='cross_entropy_per_example')
print('交叉熵:',cross_entropy.get_shape()) # (128,)
cross_entropy_mean = tf.reduce_mean(cross_entropy,name='cross_entropy')
tf.add_to_collection('losses', cross_entropy_mean)
# tf.add_n():多項連加
return tf.add_n(tf.get_collection('losses'),name='total_loss')
loss = loss(logits, label_holder)
train_op = tf.train.AdamOptimizer(1e-3).minimize(loss)
# 輸出結果top_k准確率,默認為1
top_k_op = tf.nn.in_top_k(logits, label_holder, 1)
sess = tf.InteractiveSession()
tf.global_variables_initializer().run()
# 訓練部分
# 啟動數據增強隊列
tf.train.start_queue_runners()
for step in range(max_steps):
start_time = time.time()
image_batch, label_batch = sess.run([images_train, labels_train])
_, loss_value = sess.run([train_op, loss],
feed_dict={image_holder:image_batch, label_holder:label_batch})
duration = time.time() - start_time
if step % 10 == 0:
examples_per_sec = batch_size / duration
sec_per_batch = float(duration)
format_str = ('step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)')
print(format_str % (step, loss_value, examples_per_sec, sec_per_batch))
# 測試部分
num_examples = 10000
import math
num_iter = int(math.ceil(num_examples / batch_size))
true_count = 0
total_sample_count = num_iter * batch_size
step = 0
while step < num_iter:
image_batch, label_batch = sess.run([images_test, labels_test])
predictions = sess.run(top_k_op, feed_dict={image_holder:image_batch,
label_holder:label_batch})
true_count += np.sum(predictions)
step += 1
prediction = predictions / total_sample_count
print('precision @ 1 = %.3f' % prediction)
TensotFlow使用總結
標准化層使用方法
tf.nn.lrn(conv2,4,bias=1.,alpha=0.001/9.,beta=0.75)
tf.nn.lrn(input,depth_radius=None,bias=None,alpha=None,beta=None,name=None)
局部響應歸一化原理是仿造生物學上活躍的神經元對相鄰神經元的抑制現象(側抑制),然后根據論文有公式如下
a,n/2,k,α,β分別表示函數中的input,depth_radius,bias,alpha,beta
L2正則化添加方法
weight_loss = tf.multiply(tf.nn.l2_loss(var),wl,name='weight_loss')
tf.add_to_collection('losses', weight_loss)
tf.add_n(tf.get_collection('losses'),name='total_loss')
點乘&矩陣乘
tf.multiply和tf.matmul區別
解析:
(1)tf.multiply是點乘,即Returns x * y element-wise.
(2)tf.matmul是矩陣乘法,即Multiplies matrix a by matrix b, producing a * b.
幾種加法
tf.nn.bias_add 是 tf.add 的一個特例
二者均支持 broadcasting(廣播機制),也即兩個操作數最后一個維度保持一致。
除了支持最后一個維度保持一致的兩個操作數相加外,tf.add 還支持第二個操作數是一維的情況
tf.add_n():多項連加 return tf.add_n(tf.get_collection('losses'),name='total_loss')
softmax交叉熵
tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits,labels=labels, name='cross_entropy_per_example')
softmax之后,計算輸出層全部節點各自的交叉熵
輸出top_k准確率
tf.nn.in_top_k(logits, label_holder, 1)
最后一個參數是k
獲取尺寸
tf.shape(x)
tf.shape()中x數據類型可以是tensor,list,array,返回是一個tensor.
shape=tf.placeholder(tf.float32, shape=[None, 227,227,3] )我們經常會這樣來
feed數據,如果在運行的時候想知道None到底是多少,這時候,只能通過tf.shape(x)[0]這種方式來獲得.tensor.get_shape()
只有tensor有這個方法, 返回是一個tuple.
輸入,
print('reshape.get_shape()[1]:',
reshape.get_shape()[1],
type(reshape.get_shape()[1]))
print('reshape.get_shape()[1].value:',
reshape.get_shape()[1].value,
type(reshape.get_shape()[1].value)) # 動態獲取tensor shape的方式,必須調用.value
print('tf.shape(reshape):',tf.shape(reshape))
輸出,
reshape.get_shape()[1]: 2304 <class 'tensorflow.python.framework.tensor_shape.Dimension'>
reshape.get_shape()[1].value: 2304 <class 'int'>
tf.shape(reshape): Tensor("Shape_2:0", shape=(2,), dtype=int32)
張量切片
tf.slice
解析:slice(input_, begin, size, name=None):Extracts a slice from a tensor.
假設input為[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]],如下所示:
(1)tf.slice(input, [1, 0, 0], [1, 1, 3]) ==> [[[3, 3, 3]]]
(2)tf.slice(input, [1, 0, 0], [1, 2, 3]) ==> [[[3, 3, 3], [4, 4, 4]]]
(3)tf.slice(input, [1, 0, 0], [2, 1, 3]) ==> [[[3, 3, 3]], [[5, 5, 5]]]
tf.strided_slice(record_bytes, [0], [label_bytes]), tf.int32)
在看cifar10的例子的時候,必然會看到一個函數,官方給的文檔注釋長而晦澀,基本等於0.網上也有這個函數,但解釋差勁或者基本沒有解釋,函數的原型是醬紫的.
def strided_slice(input_, begin, end, strides=None, begin_mask=0, end_mask=0, ellipsis_mask=0, new_axis_mask=0, shrink_axis_mask=0, var=None, name=None): """Extracts a strided slice from a tensor.'input'= [[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]]
來把輸入變個型,可以看成3維的tensor,從外向為1,2,3維
[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]], [[5, 5, 5], [6, 6, 6]]]以tf.strided_slice(input, [0,0,0], [2,2,2], [1,2,1])調用為例,start = [0,0,0] , end = [2,2,2], stride = [1,2,1],求一個[start, end)的一個片段,注意end為開區間
第1維 start = 0 , end = 2, stride = 1, 所以取 0 , 1行,此時的輸出
output1=
[[[1, 1, 1], [2, 2, 2]], [[3, 3, 3], [4, 4, 4]]]第2維時, start = 0 , end = 2 , stride = 2, 所以只能取0行,此時的輸出
output2=
[[[1, 1, 1]], [[3, 3, 3]]]第3維的時候,start = 0, end = 2, stride = 1, 可以取0,1行,此時得到的就是最后的輸出
[[[1, 1]], [[3, 3]]]整理之后最終的輸出為:
[[[1,1],[3,3]]]
類似代碼如下:
- import tensorflow as tf
- data = [[[1, 1, 1], [2, 2, 2]],
- [[3, 3, 3], [4, 4, 4]],
- [[5, 5, 5], [6, 6, 6]]]
- x = tf.strided_slice(data,[0,0,0],[1,1,1])
- with tf.Session() as sess:
- print(sess.run(x))

