1.構建一個簡單的網絡層
from __future__ import absolute_import, division, print_function
import tensorflow as tf
tf.keras.backend.clear_session()
import tensorflow.keras as keras
import tensorflow.keras.layers as layers
# 定義網絡層就是:設置網絡權重和輸出到輸入的計算過程
class MyLayer(layers.Layer):
def __init__(self, input_dim=32, unit=32):
super(MyLayer, self).__init__()
w_init = tf.random_normal_initializer()
self.weight = tf.Variable(initial_value=w_init(
shape=(input_dim, unit), dtype=tf.float32), trainable=True)
b_init = tf.zeros_initializer()
self.bias = tf.Variable(initial_value=b_init(
shape=(unit,), dtype=tf.float32), trainable=True)
def call(self, inputs):
return tf.matmul(inputs, self.weight) + self.bias
x = tf.ones((3,5))
my_layer = MyLayer(5, 4)
out = my_layer(x)
print(out)
tf.Tensor(
[[0.06709253 0.06818779 0.09926171 0.0179923 ]
[0.06709253 0.06818779 0.09926171 0.0179923 ]
[0.06709253 0.06818779 0.09926171 0.0179923 ]], shape=(3, 4), dtype=float32)
按上面構建網絡層,圖層會自動跟蹤權重w和b,當然我們也可以直接用add_weight的方法構建權重
class MyLayer(layers.Layer):
def __init__(self, input_dim=32, unit=32):
super(MyLayer, self).__init__()
self.weight = self.add_weight(shape=(input_dim, unit),
initializer=keras.initializers.RandomNormal(),
trainable=True)
self.bias = self.add_weight(shape=(unit,),
initializer=keras.initializers.Zeros(),
trainable=True)
def call(self, inputs):
return tf.matmul(inputs, self.weight) + self.bias
x = tf.ones((3,5))
my_layer = MyLayer(5, 4)
out = my_layer(x)
print(out)
tf.Tensor(
[[-0.10401802 -0.05459599 -0.08195674 0.13151655]
[-0.10401802 -0.05459599 -0.08195674 0.13151655]
[-0.10401802 -0.05459599 -0.08195674 0.13151655]], shape=(3, 4), dtype=float32)
也可以設置不可訓練的權重
class AddLayer(layers.Layer):
def __init__(self, input_dim=32):
super(AddLayer, self).__init__()
self.sum = self.add_weight(shape=(input_dim,),
initializer=keras.initializers.Zeros(),
trainable=False)
def call(self, inputs):
self.sum.assign_add(tf.reduce_sum(inputs, axis=0))
return self.sum
x = tf.ones((3,3))
my_layer = AddLayer(3)
out = my_layer(x)
print(out.numpy())
out = my_layer(x)
print(out.numpy())
print('weight:', my_layer.weights)
print('non-trainable weight:', my_layer.non_trainable_weights)
print('trainable weight:', my_layer.trainable_weights)
[3. 3. 3.]
[6. 6. 6.]
weight: [<tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([6., 6., 6.], dtype=float32)>]
non-trainable weight: [<tf.Variable 'Variable:0' shape=(3,) dtype=float32, numpy=array([6., 6., 6.], dtype=float32)>]
trainable weight: []
當定義網絡時不知道網絡的維度是可以重寫build()函數,用獲得的shape構建網絡
class MyLayer(layers.Layer):
def __init__(self, unit=32):
super(MyLayer, self).__init__()
self.unit = unit
def build(self, input_shape):
self.weight = self.add_weight(shape=(input_shape[-1], self.unit),
initializer=keras.initializers.RandomNormal(),
trainable=True)
self.bias = self.add_weight(shape=(self.unit,),
initializer=keras.initializers.Zeros(),
trainable=True)
def call(self, inputs):
return tf.matmul(inputs, self.weight) + self.bias
my_layer = MyLayer(3)
x = tf.ones((3,5))
out = my_layer(x)
print(out)
my_layer = MyLayer(3)
x = tf.ones((2,2))
out = my_layer(x)
print(out)
tf.Tensor(
[[ 0.00949192 -0.02009935 -0.11726624]
[ 0.00949192 -0.02009935 -0.11726624]
[ 0.00949192 -0.02009935 -0.11726624]], shape=(3, 3), dtype=float32)
tf.Tensor(
[[-0.00516411 -0.04891593 -0.0181773 ]
[-0.00516411 -0.04891593 -0.0181773 ]], shape=(2, 3), dtype=float32)
2.使用子層遞歸構建網絡層
class MyBlock(layers.Layer):
def __init__(self):
super(MyBlock, self).__init__()
self.layer1 = MyLayer(32)
self.layer2 = MyLayer(16)
self.layer3 = MyLayer(2)
def call(self, inputs):
h1 = self.layer1(inputs)
h1 = tf.nn.relu(h1)
h2 = self.layer2(h1)
h2 = tf.nn.relu(h2)
return self.layer3(h2)
my_block = MyBlock()
print('trainable weights:', len(my_block.trainable_weights))
y = my_block(tf.ones(shape=(3, 64)))
# 構建網絡在build()里面,所以執行了才有網絡
print('trainable weights:', len(my_block.trainable_weights))
trainable weights: 0
trainable weights: 6
可以通過構建網絡層的方法來收集loss
class LossLayer(layers.Layer):
def __init__(self, rate=1e-2):
super(LossLayer, self).__init__()
self.rate = rate
def call(self, inputs):
self.add_loss(self.rate * tf.reduce_sum(inputs))
return inputs
class OutLayer(layers.Layer):
def __init__(self):
super(OutLayer, self).__init__()
self.loss_fun=LossLayer(1e-2)
def call(self, inputs):
return self.loss_fun(inputs)
my_layer = OutLayer()
print(len(my_layer.losses)) # 還未call
y = my_layer(tf.zeros(1,1))
print(len(my_layer.losses)) # 執行call之后
y = my_layer(tf.zeros(1,1))
print(len(my_layer.losses)) # call之前會重新置0
0
1
1
如果中間調用了keras網絡層,里面的正則化loss也會被加入進來
class OuterLayer(layers.Layer):
def __init__(self):
super(OuterLayer, self).__init__()
self.dense = layers.Dense(32, kernel_regularizer=tf.keras.regularizers.l2(1e-3))
def call(self, inputs):
return self.dense(inputs)
my_layer = OuterLayer()
y = my_layer(tf.zeros((1,1)))
print(my_layer.losses)
print(my_layer.weights)
[<tf.Tensor: id=413, shape=(), dtype=float32, numpy=0.0018067828>]
[<tf.Variable 'outer_layer_1/dense_1/kernel:0' shape=(1, 32) dtype=float32, numpy=
array([[-0.11054656, 0.34735924, -0.22560999, 0.38415992, 0.13070339,
0.15960163, 0.20130599, 0.40365922, -0.09471637, -0.02402192,
0.16438413, 0.2716753 , 0.0594548 , -0.06913272, -0.40491152,
0.00894281, 0.3199494 , 0.0228827 , -0.18515846, 0.32210535,
0.41672045, 0.1942389 , -0.4254937 , 0.07178113, 0.00740242,
0.23780417, -0.24449413, -0.15526545, -0.2200018 , -0.2426699 ,
-0.17750363, -0.16994882]], dtype=float32)>, <tf.Variable 'outer_layer_1/dense_1/bias:0' shape=(32,) dtype=float32, numpy=
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
dtype=float32)>]
3.其他網絡層配置
使自己的網絡層可以序列化
class Linear(layers.Layer):
def __init__(self, units=32, **kwargs):
super(Linear, self).__init__(**kwargs)
self.units = units
def build(self, input_shape):
self.w = self.add_weight(shape=(input_shape[-1], self.units),
initializer='random_normal',
trainable=True)
self.b = self.add_weight(shape=(self.units,),
initializer='random_normal',
trainable=True)
def call(self, inputs):
return tf.matmul(inputs, self.w) + self.b
def get_config(self):
config = super(Linear, self).get_config()
config.update({'units':self.units})
return config
layer = Linear(125)
config = layer.get_config()
print(config)
new_layer = Linear.from_config(config)
{'name': 'linear_1', 'trainable': True, 'dtype': None, 'units': 125}
配置只有訓練時可以執行的網絡層
class MyDropout(layers.Layer):
def __init__(self, rate, **kwargs):
super(MyDropout, self).__init__(**kwargs)
self.rate = rate
def call(self, inputs, training=None):
return tf.cond(training,
lambda: tf.nn.dropout(inputs, rate=self.rate),
lambda: inputs)
4.構建自己的模型
通常,我們使用Layer類來定義內部計算塊,並使用Model類來定義外部模型 - 即要訓練的對象。
Model類與Layer的區別:
- 它公開了內置的訓練,評估和預測循環(model.fit(),model.evaluate(),model.predict())。
- 它通過model.layers屬性公開其內層列表。
- 它公開了保存和序列化API。
下面通過構建一個變分自編碼器(VAE),來介紹如何構建自己的網絡。
# 采樣網絡
class Sampling(layers.Layer):
def call(self, inputs):
z_mean, z_log_var = inputs
batch = tf.shape(z_mean)[0]
dim = tf.shape(z_mean)[1]
epsilon = tf.keras.backend.random_normal(shape=(batch, dim))
return z_mean + tf.exp(0.5*z_log_var) * epsilon
# 編碼器
class Encoder(layers.Layer):
def __init__(self, latent_dim=32,
intermediate_dim=64, name='encoder', **kwargs):
super(Encoder, self).__init__(name=name, **kwargs)
self.dense_proj = layers.Dense(intermediate_dim, activation='relu')
self.dense_mean = layers.Dense(latent_dim)
self.dense_log_var = layers.Dense(latent_dim)
self.sampling = Sampling()
def call(self, inputs):
h1 = self.dense_proj(inputs)
z_mean = self.dense_mean(h1)
z_log_var = self.dense_log_var(h1)
z = self.sampling((z_mean, z_log_var))
return z_mean, z_log_var, z
# 解碼器
class Decoder(layers.Layer):
def __init__(self, original_dim,
intermediate_dim=64, name='decoder', **kwargs):
super(Decoder, self).__init__(name=name, **kwargs)
self.dense_proj = layers.Dense(intermediate_dim, activation='relu')
self.dense_output = layers.Dense(original_dim, activation='sigmoid')
def call(self, inputs):
h1 = self.dense_proj(inputs)
return self.dense_output(h1)
# 變分自編碼器
class VAE(tf.keras.Model):
def __init__(self, original_dim, latent_dim=32,
intermediate_dim=64, name='encoder', **kwargs):
super(VAE, self).__init__(name=name, **kwargs)
self.original_dim = original_dim
self.encoder = Encoder(latent_dim=latent_dim,
intermediate_dim=intermediate_dim)
self.decoder = Decoder(original_dim=original_dim,
intermediate_dim=intermediate_dim)
def call(self, inputs):
z_mean, z_log_var, z = self.encoder(inputs)
reconstructed = self.decoder(z)
kl_loss = -0.5*tf.reduce_sum(
z_log_var-tf.square(z_mean)-tf.exp(z_log_var)+1)
self.add_loss(kl_loss)
return reconstructed
(x_train, _), _ = tf.keras.datasets.mnist.load_data()
x_train = x_train.reshape(60000, 784).astype('float32') / 255
vae = VAE(784,32,64)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
vae.compile(optimizer, loss=tf.keras.losses.MeanSquaredError())
vae.fit(x_train, x_train, epochs=3, batch_size=64)
Epoch 1/3
60000/60000 [==============================] - 3s 44us/sample - loss: 0.7352
Epoch 2/3
60000/60000 [==============================] - 2s 33us/sample - loss: 0.0691
Epoch 3/3
60000/60000 [==============================] - 2s 33us/sample - loss: 0.0679
自己編寫訓練方法
train_dataset = tf.data.Dataset.from_tensor_slices(x_train)
train_dataset = train_dataset.shuffle(buffer_size=1024).batch(64)
original_dim = 784
vae = VAE(original_dim, 64, 32)
optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
mse_loss_fn = tf.keras.losses.MeanSquaredError()
loss_metric = tf.keras.metrics.Mean()
# 每個epoch迭代.
for epoch in range(3):
print('Start of epoch %d' % (epoch,))
# 取出每個batch的數據並訓練.
for step, x_batch_train in enumerate(train_dataset):
with tf.GradientTape() as tape:
reconstructed = vae(x_batch_train)
# 計算 reconstruction loss
loss = mse_loss_fn(x_batch_train, reconstructed)
loss += sum(vae.losses) # 添加 KLD regularization loss
grads = tape.gradient(loss, vae.trainable_variables)
optimizer.apply_gradients(zip(grads, vae.trainable_variables))
loss_metric(loss)
if step % 100 == 0:
print('step %s: mean loss = %s' % (step, loss_metric.result()))
Start of epoch 0
step 0: mean loss = tf.Tensor(213.26726, shape=(), dtype=float32)
step 100: mean loss = tf.Tensor(6.5270114, shape=(), dtype=float32)
...
step 900: mean loss = tf.Tensor(0.3061987, shape=(), dtype=float32)