GAN網絡架構分析

上圖即為GAN的邏輯架構,其中的noise vector就是特征向量z,real images就是輸入變量x,標簽的標准比較簡單(二分類么),real的就是tf.ones,fake的就是tf.zeros。

網絡具體形狀大體如上,具體數值有所調整,生成器過程為:噪聲向量-全連接-卷積-卷積-卷積,辨別器過程:圖片-卷積-卷積-全連接-全連接。
和預想的不同,實際上數據在生成器中並不是從無到有由小變大的過程,而是由3136(56*56)經過正常卷積步驟下降為28*28的過程。
實現如下:
import datetime
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets('../../Mnist_data')
"""測試數據"""
# sample_image = mnist.train.next_batch(1)[0]
# print(sample_image.shape)
# sample_image = sample_image.reshape([28, 28])
# plt.imshow(sample_image, cmap='Greys')
"""分辨器"""
def discriminator(images, reuse=None):
with tf.variable_scope(tf.get_variable_scope(), reuse=reuse) as scope:
# 卷積 + 激活 + 池化
d_w1 = tf.get_variable('d_w1',[5,5,1,32],initializer=tf.truncated_normal_initializer(stddev=0.02))
d_b1 = tf.get_variable('d_b1',[32],initializer=tf.constant_initializer(0))
d1 = tf.nn.conv2d(input=images,filter=d_w1,strides=[1,1,1,1],padding='SAME')
d1 = d1 + d_b1
d1 = tf.nn.relu(d1)
d1 = tf.nn.avg_pool(d1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
# 卷積 + 激活 + 池化
d_w2 = tf.get_variable('d_w2',[5,5,32,64],initializer=tf.truncated_normal_initializer(stddev=0.02))
d_b2 = tf.get_variable('d_b2',[64],initializer=tf.constant_initializer(0))
d2 = tf.nn.conv2d(input=d1,filter=d_w2,strides=[1,1,1,1],padding='SAME')
d2 = d2 + d_b2
d2 = tf.nn.relu(d2)
d2 = tf.nn.avg_pool(d2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
# 全連接 + 激活
d_w3 = tf.get_variable('d_w3',[7 * 7 * 64,1024],initializer=tf.truncated_normal_initializer(stddev=0.02))
d_b3 = tf.get_variable('d_b3',[1024],initializer=tf.constant_initializer(0))
d3 = tf.reshape(d2,[-1,7 * 7 * 64])
d3 = tf.matmul(d3,d_w3)
d3 = d3 + d_b3
d3 = tf.nn.relu(d3)
# 全連接
d_w4 = tf.get_variable('d_w4',[1024,1],initializer=tf.truncated_normal_initializer(stddev=0.02))
d_b4 = tf.get_variable('d_b4',[1],initializer=tf.constant_initializer(0))
d4 = tf.matmul(d3,d_w4) + d_b4
# 最后輸出一個非尺度化的值
return d4
"""生成器"""
def generator(z, batch_size, z_dim, reuse=False):
'''接收特征向量z,由z生成圖片'''
with tf.variable_scope(tf.get_variable_scope(),reuse=reuse):
# 全連接 + 批正則化 + 激活
# z_dim -> 3136 -> 56*56*1
g_w1 = tf.get_variable('g_w1', [z_dim, 3136], dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.02))
g_b1 = tf.get_variable('g_b1', [3136], initializer=tf.truncated_normal_initializer(stddev=0.02))
g1 = tf.matmul(z, g_w1) + g_b1
g1 = tf.reshape(g1, [-1, 56, 56, 1])
g1 = tf.contrib.layers.batch_norm(g1, epsilon=1e-5, scope='bn1')
g1 = tf.nn.relu(g1)
# 卷積 + 批正則化 + 激活
g_w2 = tf.get_variable('g_w2',[3,3,1,z_dim / 2],dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.02))
g_b2 = tf.get_variable('g_b2',[z_dim / 2],initializer=tf.truncated_normal_initializer(stddev=0.02))
g2 = tf.nn.conv2d(g1,g_w2,strides=[1,2,2,1],padding='SAME')
g2 = g2 + g_b2
g2 = tf.contrib.layers.batch_norm(g2,epsilon=1e-5,scope='bn2')
g2 = tf.nn.relu(g2)
g2 = tf.image.resize_images(g2,[56,56])
# 卷積 + 批正則化 + 激活
g_w3 = tf.get_variable('g_w3',[3,3,z_dim / 2,z_dim / 4],dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.02))
g_b3 = tf.get_variable('g_b3',[z_dim / 4],initializer=tf.truncated_normal_initializer(stddev=0.02))
g3 = tf.nn.conv2d(g2,g_w3,strides=[1,2,2,1],padding='SAME')
g3 = g3 + g_b3
g3 = tf.contrib.layers.batch_norm(g3,epsilon=1e-5,scope='bn3')
g3 = tf.nn.relu(g3)
g3 = tf.image.resize_images(g3,[56,56])
# 卷積 + 激活
g_w4 = tf.get_variable('g_w4',[1,1,z_dim / 4,1],dtype=tf.float32,
initializer=tf.truncated_normal_initializer(stddev=0.02))
g_b4 = tf.get_variable('g_b4',[1],initializer=tf.truncated_normal_initializer(stddev=0.02))
g4 = tf.nn.conv2d(g3,g_w4,strides=[1,2,2,1],padding='SAME')
g4 = g4 + g_b4
g4 = tf.sigmoid(g4)
# 輸出g4的維度: batch_size x 28 x 28 x 1
return g4
邏輯實現如下,不同組成部分的loss值是分開計算的:
"""邏輯架構""" tf.reset_default_graph() batch_size = 50 z_dimensions = 100 z_placeholder = tf.placeholder(tf.float32, [None, z_dimensions], name='z_placeholder') x_placeholder = tf.placeholder(tf.float32, shape = [None,28,28,1], name='x_placeholder') Gz = generator(z_placeholder, batch_size, z_dimensions) # 根據z生成偽造圖片 Dx = discriminator(x_placeholder) # 辨別器辨別真實圖片 Dg = discriminator(Gz, reuse=True) # 辨別器辨別偽造圖片 #discriminator 的loss 分為兩部分 d_loss_real = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = Dx, labels = tf.ones_like(Dx))) d_loss_fake = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = Dg, labels = tf.zeros_like(Dg))) d_loss=d_loss_real + d_loss_fake # Generator的目標是生成盡可能真實的圖像,所以計算Dg和1的loss g_loss = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits = Dg, labels = tf.ones_like(Dg)))
優化器部分有一些注意點:
"""優化部分""" # 由於訓練時生成器和辨別器是分開訓練的, # 所以不同的訓練過程對應的優化參數是要做區分的 tvars = tf.trainable_variables() d_vars = [var for var in tvars if 'd_' in var.name] g_vars = [var for var in tvars if 'g_' in var.name] d_trainer_real = tf.train.AdamOptimizer(0.0003).minimize(d_loss_real, var_list=d_vars) d_trainer_fake = tf.train.AdamOptimizer(0.0003).minimize(d_loss_fake, var_list=d_vars) d_trainer = tf.train.AdamOptimizer(0.0003).minimize(d_loss, var_list=d_vars) g_trainer = tf.train.AdamOptimizer(0.0001).minimize(g_loss, var_list=g_vars)
入注釋所說,訓練不同的位置,優化不同的參數,不可以混淆,所以這里就涉及了tf變量提取的手法,結果展示如下:
import pprint pp = pprint.PrettyPrinter() pp.pprint(d_vars) pp.pprint(g_vars) [<tf.Variable 'd_w1:0' shape=(5, 5, 1, 32) dtype=float32_ref>, <tf.Variable 'd_b1:0' shape=(32,) dtype=float32_ref>, <tf.Variable 'd_w2:0' shape=(5, 5, 32, 64) dtype=float32_ref>, <tf.Variable 'd_b2:0' shape=(64,) dtype=float32_ref>, <tf.Variable 'd_w3:0' shape=(3136, 1024) dtype=float32_ref>, <tf.Variable 'd_b3:0' shape=(1024,) dtype=float32_ref>, <tf.Variable 'd_w4:0' shape=(1024, 1) dtype=float32_ref>, <tf.Variable 'd_b4:0' shape=(1,) dtype=float32_ref>] [<tf.Variable 'g_w1:0' shape=(100, 3136) dtype=float32_ref>, <tf.Variable 'g_b1:0' shape=(3136,) dtype=float32_ref>, <tf.Variable 'g_w2:0' shape=(3, 3, 1, 50) dtype=float32_ref>, <tf.Variable 'g_b2:0' shape=(50,) dtype=float32_ref>, <tf.Variable 'g_w3:0' shape=(3, 3, 50, 25) dtype=float32_ref>, <tf.Variable 'g_b3:0' shape=(25,) dtype=float32_ref>, <tf.Variable 'g_w4:0' shape=(1, 1, 25, 1) dtype=float32_ref>, <tf.Variable 'g_b4:0' shape=(1,) dtype=float32_ref>]
之后是訓練過程:
"""迭代訓練"""
sess = tf.Session()
sess.run(tf.global_variables_initializer())
# 對discriminator的預訓練
for i in range(300):
print('.',end='')
z_batch = np.random.normal(0, 1, size=[batch_size, z_dimensions])
real_image_batch = mnist.train.next_batch(batch_size)[0].reshape([batch_size, 28, 28, 1])
# 用real and fake images分別對discriminator訓練
_, __, dLossReal, dLossFake = sess.run([d_trainer_real, d_trainer_fake, d_loss_real, d_loss_fake],
{x_placeholder: real_image_batch, z_placeholder: z_batch})
if (i % 100 == 0):
print("\rdLossReal:",dLossReal,"dLossFake:",dLossFake)
# 交替訓練 generator和discriminator
for i in range(100000):
print('.',end='')
real_image_batch = mnist.train.next_batch(batch_size)[0].reshape([batch_size, 28, 28, 1])
z_batch = np.random.normal(0, 1, size=[batch_size, z_dimensions])
# 用real and fake images同時對discriminator訓練
_,dLossReal,dLossFake = sess.run([d_trainer,d_loss_real,d_loss_fake],
{x_placeholder: real_image_batch,z_placeholder: z_batch})
# 訓練generator
z_batch = np.random.normal(0,1,size=[batch_size,z_dimensions])
_ = sess.run(g_trainer,feed_dict={z_placeholder: z_batch})
if i % 100 == 0:
# 每 100 iterations, 輸出一個生成的圖像
print("\rIteration:",i,"at",datetime.datetime.now())
z_batch = np.random.normal(0,1,size=[1,z_dimensions])
generated_images = generator(z_placeholder,1,z_dimensions, reuse=True)
images = sess.run(generated_images,{z_placeholder: z_batch})
plt.imshow(images[0].reshape([28,28]),cmap='Greys')
plt.show()
# 輸出discriminator的值
im = images[0].reshape([1,28,28,1])
result = discriminator(x_placeholder, reuse=True)
estimate = sess.run(result,{x_placeholder: im})
print("Estimate:",np.squeeze(estimate))
先預訓練分辨器,
然后交替訓練分辨器和生成器。
其實是有一點圖片可以展示的,但是我的電腦性能太渣(蘇菲4),跑了600輪左右的迭代我實在於心不忍了,先擱置吧... 以后有機會回實驗室在說,至少原理是體會到了。
共享變量
之前看文檔時體會不深,現在大體明白共享變量的存在意義了,它是在設計計算圖時考慮的:
同一個變量如果有不同的數據流(計算圖中不同的節點在不同的時刻去給同一個節點的同一個輸入位置提供數據),
- Variable變量會之間創建兩個不同的變量節點去接收不同的數據流
- get_variable變量在reuse為True時會使用同一個變量應付不同的數據流
這也就是共享變量的應用之處。這在上面的程序中體現在判別器的任務,如果接收到的是生成器生成的圖像,判別器就嘗試優化自己的網絡結構來使自己輸出0,如果接收到的是來自真實數據的圖像,那么就嘗試優化自己的網絡結構來使自己輸出1。也就是說,fake圖像和real圖像經過判別器的時候,要共享同一套變量,所以TensorFlow引入了變量共享機制,而和正常的卷積網絡不同的是這里的fake和real變量並不在同一個計算圖節點位置(real圖片在x節點處輸入,而fake圖則在生成器輸出節點位置計入計算圖)。
