''' 加載cifar10圖片集並准備將圖片進行灰度化 ''' from keras.datasets import cifar10 def rgb2gray(rgb): #把彩色圖轉化為灰度圖,如果當前像素點為[r,g,b],那么對應的灰度點為0.299*r+0.587*g+0.114*b return np.dot(rgb[...,:3], [0.299, 0.587, 0.114]) (x_train, _),(x_test, _) = cifar10.load_data() img_rows = x_train.shape[1] img_cols = x_train.shape[2] channels = x_train.shape[3] #將100張彩色原圖集合在一起顯示 imgs = x_test[: 100] imgs = imgs.reshape((10, 10, img_rows, img_cols, channels)) imgs = np.vstack([np.hstack(i) for i in imgs]) plt.figure() plt.axis('off') plt.title('Original color images') plt.imshow(imgs, interpolation = 'none') plt.show() #將圖片灰度化后顯示出來 x_train_gray = rgb2gray(x_train) x_test_gray = rgb2gray(x_test) imgs = x_test_gray[: 100] imgs = imgs.reshape((10, 10, img_rows, img_cols)) imgs = np.vstack([np.hstack(i) for i in imgs]) plt.figure() plt.axis('off') plt.title('gray images') plt.imshow(imgs, interpolation='none', cmap='gray') plt.show() #將彩色圖片和灰度圖正規化,也就是把像素點值設置到[0,1]之間 x_train = x_train.astype('float32') / 255 x_test = x_test.astype('float32') / 255 x_train_gray = x_train_gray.astype('float32') / 255 x_test_gray = x_test_gray.astype('float32') / 255 ''' 將二維圖片集合壓扁為一維向量[num *row * col * 3], num 是圖片數量 ''' x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, channels) x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, channels) x_train_gray = x_train_gray.reshape(x_train_gray.shape[0], img_rows, img_cols, 1) x_test_gray = x_test_gray.reshape(x_test_gray.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1) batch_size = 32 kernel_size = 3 #由於圖片編碼后需要保持圖片物體與顏色信息,因此編碼后的一維向量維度要變大 latent_dim = 256 layer_filters = [64, 128, 256] inputs = Input(shape=input_shape, name = 'encoder_input') x = inputs for filters in layer_filters: x = Conv2D(filters = filters, kernel_size = kernel_size, strides = 2, activation = 'relu', padding = 'same')(x) ''' 得到最后一層卷積層輸出的數據格式,輸入時格式為(32, 32, 3), 經過三層卷積層后輸出為(4, 4, 256) ''' shape = K.int_shape(x) x = Flatten()(x) latent = Dense(latent_dim, name = 'latent_vector')(x) encoder = Model(inputs, latent, name = 'encoder') encoder.summary()
latent_inputs = Input(shape=(latent_dim, ), name = 'decoder_input') ''' 將編碼器輸出的一維向量傳入一個全連接網絡層,輸出的數據格式與上面shape變量相同,為[4, 4, 256] ''' x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) x = Reshape((shape[1], shape[2], shape[3]))(x) ''' 解碼器對應編碼器做反向操作,因此它將數據經過三個反卷積層,卷積層的輸出維度與編碼器恰好相反,分別為 256, 128, 64,每經過一個反卷積層,數據維度增加一倍,因此輸入時數據維度為[4,4],經過三個反卷積層后 維度為[32,32]恰好與圖片格式一致 ''' for filters in layer_filters[::-1]: x = Conv2DTranspose(filters = filters, kernel_size = kernel_size, strides = 2, activation = 'relu', padding = 'same')(x) outputs = Conv2DTranspose(filters = channels, kernel_size = kernel_size, activation='relu', padding='same', name = 'decoder_output')(x) print(K.int_shape(outputs)) decoder = Model(latent_inputs, outputs, name = 'decoder') decoder.summary()
from keras.callbacks import ReduceLROnPlateau, ModelCheckpoint import os autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') autoencoder.summary() #如果經過5次循環訓練后效果沒有改進,那么就把學習率減少0.1的開方,通過調整學習率促使訓練效果改進 lr_reducer = ReduceLROnPlateau(factor = np.sqrt(0.1), cooldown = 0, patience = 5, verbose = 1, min_lr = 0.5e-6) save_dir = os.path.join(os.getcwd(), 'save_models') model_name = 'colorized_ae+model.{epoch:03d}.h5' if os.path.isdir(save_dir) is not True: os.makedirs(save_dir) filepath = os.path.join(save_dir, model_name) checkpoint = ModelCheckpoint(filepath = filepath, monitor = 'val_loss', verbose = 1) autoencoder.compile(loss='mse', optimizer = 'adam') callbacks = [lr_reducer, checkpoint] autoencoder.fit(x_train_gray, x_train, validation_data = (x_test_gray, x_test), epochs = 30, batch_size = batch_size, callbacks = callbacks)
#將灰度圖和上色后的圖片顯示出來 x_decoded = autoencoder.predict(x_test_gray) imgs = x_decoded[:100] imgs = imgs.reshape((10, 10, img_rows, img_cols, channels)) imgs = np.vstack([np.hstack(i) for i in imgs]) plt.figure() plt.axis('off') plt.title('Colorized test images are: ') plt.imshow(imgs, interpolation='none') plt.show()