基礎知識

下面有莫凡的對於批處理的解釋:
fc_mean,fc_var = tf.nn.moments(
Wx_plus_b,
axes=[0],
# 想要 normalize 的維度, [0] 代表 batch 維度
# 如果是圖像數據, 可以傳入 [0, 1, 2], 相當於求[batch, height, width] 的均值/方差, 注意不要加入 channel 維度
)
scale = tf.Variable(tf.ones([out_size]))
shift = tf.Variable(tf.zeros([out_size]))
epsilon = 0.001
Wx_plus_b = tf.nn.batch_normalization(Wx_plus_b,fc_mean,fc_var,shift,scale,epsilon)
# 上面那一步, 在做如下事情:
# Wx_plus_b = (Wx_plus_b - fc_mean) / tf.sqrt(fc_var + 0.001)
# Wx_plus_b = Wx_plus_b * scale + shift
tf.contrib.layers.batch_norm:封裝好的批處理類
class batch_norm():
'''batch normalization層'''
def __init__(self, epsilon=1e-5,
momentum=0.9, name='batch_norm'):
'''
初始化
:param epsilon: 防零極小值
:param momentum: 滑動平均參數
:param name: 節點名稱
'''
with tf.variable_scope(name):
self.epsilon = epsilon
self.momentum = momentum
self.name = name
def __call__(self, x, train=True):
# 一個封裝了的會在內部調用batch_normalization進行正則化的高級接口
return tf.contrib.layers.batch_norm(x,
decay=self.momentum, # 滑動平均參數
updates_collections=None,
epsilon=self.epsilon,
scale=True,
is_training=train, # 影響滑動平均
scope=self.name)
1.
Note: when training, the moving_mean and moving_variance need to be updated.
By default the update ops are placed in `tf.GraphKeys.UPDATE_OPS`, so they
need to be added as a dependency to the `train_op`. For example:
```python
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = optimizer.minimize(loss)
```
One can set updates_collections=None to force the updates in place, but that
can have a speed penalty, especially in distributed settings.
2.
is_training: Whether or not the layer is in training mode. In training mode
it would accumulate the statistics of the moments into `moving_mean` and
`moving_variance` using an exponential moving average with the given
`decay`. When it is not in training mode then it would use the values of
the `moving_mean` and the `moving_variance`.
tf.nn.batch_normalization:原始接口封裝使用
實際上tf.contrib.layers.batch_norm對於tf.nn.moments和tf.nn.batch_normalization進行了一次封裝,這個類又進行了一次封裝(主要是制訂了一部分默認參數),實際操作時可以僅僅使用tf.contrib.layers.batch_norm函數,它已經足夠方便了。
添加了滑動平均處理之后,也就是不使用封裝,直接使用tf.nn.moments和tf.nn.batch_normalization實現的batch_norm函數:
def batch_norm(x,beta,gamma,phase_train,scope='bn',decay=0.9,eps=1e-5):
with tf.variable_scope(scope):
# beta = tf.get_variable(name='beta', shape=[n_out], initializer=tf.constant_initializer(0.0), trainable=True)
# gamma = tf.get_variable(name='gamma', shape=[n_out],
# initializer=tf.random_normal_initializer(1.0, stddev), trainable=True)
batch_mean,batch_var = tf.nn.moments(x,[0,1,2],name='moments')
ema = tf.train.ExponentialMovingAverage(decay=decay)
def mean_var_with_update():
ema_apply_op = ema.apply([batch_mean,batch_var])
with tf.control_dependencies([ema_apply_op]):
return tf.identity(batch_mean),tf.identity(batch_var)
# identity之后會把Variable轉換為Tensor並入圖中,
# 否則由於Variable是獨立於Session的,不會被圖控制control_dependencies限制
mean,var = tf.cond(phase_train,
mean_var_with_update,
lambda: (ema.average(batch_mean),ema.average(batch_var)))
normed = tf.nn.batch_normalization(x, mean, var, beta, gamma, eps)
return normed
另一種將滑動平均展開了的方式,
def batch_norm(x, size, training, decay=0.999):
beta = tf.Variable(tf.zeros([size]), name='beta')
scale = tf.Variable(tf.ones([size]), name='scale')
pop_mean = tf.Variable(tf.zeros([size]))
pop_var = tf.Variable(tf.ones([size]))
epsilon = 1e-3
batch_mean, batch_var = tf.nn.moments(x, [0, 1, 2])
train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean * (1 - decay))
train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
def batch_statistics():
with tf.control_dependencies([train_mean, train_var]):
return tf.nn.batch_normalization(x, batch_mean, batch_var, beta, scale, epsilon, name='batch_norm')
def population_statistics():
return tf.nn.batch_normalization(x, pop_mean, pop_var, beta, scale, epsilon, name='batch_norm')
return tf.cond(training, batch_statistics, population_statistics)
注, tf.cond:流程控制,參數一True,則執行參數二的函數,否則執行參數三函數。
