『cs231n』通過代碼理解風格遷移


『cs231n』卷積神經網絡的可視化應用

文件目錄

vgg16.py

import os
import numpy as np
import tensorflow as tf
from download import exist_or_download

model_url = 'https://s3.amazonaws.com/cadl/models/vgg16.tfmodel'
model_dir = 'vgg16/'
model_name = 'vgg16.tfmodel'

def model_download():
    print('Downloading Model ... ')
    exist_or_download(url=model_url, dir=model_dir)

class VGG16:
    input_tensor_name = 'images:0'
    layer_operation_names = ['conv1_1/conv1_1','conv1_2/conv1_2',
                            'conv2_1/conv2_1','conv2_2/conv2_2',
                            'conv3_1/conv3_1','conv3_2/conv3_2','conv3_3/conv3_3',
                            'conv4_1/conv4_1','conv4_2/conv4_2','conv4_3/conv4_3',
                            'conv5_1/conv5_1','conv5_2/conv5_2','conv5_3/conv5_3']
    dropout_tensor_names = ['dropout/random_uniform:0',
                            'dropout_1/random_uniform:0']

    def __init__(self):
        # 載入tfmodel模型到新的圖中
        self.graph = tf.Graph()
        with self.graph.as_default():
            with tf.gfile.FastGFile(os.path.join(model_dir,model_name),'rb') as f:
                graph_def = tf.GraphDef()
                graph_def.ParseFromString(f.read())
                tf.import_graph_def(graph_def,name='')
            # 由圖中獲取tensor
            self.input_tensor = self.graph.get_tensor_by_name(self.input_tensor_name)
            self.layer_tensors = [self.graph.get_tensor_by_name(name + ':0') for name in self.layer_operation_names]

    def get_layer_tensors(self, layer_ids):
        '''獲取特定序列號的tensors'''
        return [self.layer_tensors[idx] for idx in layer_ids]

    def get_layer_names(self, layer_ids):
        '''獲取特定序列號的op名稱'''
        return [self.layer_operation_names[idx] for idx in layer_ids]

    def get_all_layer_names(self, startswith=None):
        '''獲取全op中名稱開頭為startswith的op名稱'''
        names = [op.name for op in self.graph.get_operations()]
        if startswith is not None:
            names = [name for name in names if name.startswith(startswith)]
        return names

    def create_feed_dict(self,input_image):
        '''把input圖片擴維,並以feed字典形式返回'''
        input_image = np.expand_dims(input_image, axis=0)
        feed_dict = {self.input_tensor_name: input_image}
        return feed_dict

二進制模型文件載入流程:

# 新建空白圖
self.graph = tf.Graph()
# 空白圖列為默認圖
with self.graph.as_default():
    # 二進制讀取模型文件
    with tf.gfile.FastGFile(os.path.join(model_dir,model_name),'rb') as f:
        # 新建GraphDef文件,用於臨時載入模型中的圖 
        graph_def = tf.GraphDef()
        # GraphDef加載模型中的圖
        graph_def.ParseFromString(f.read())
        # 在空白圖中加載GraphDef中的圖
        tf.import_graph_def(graph_def,name='')
        # 在圖中獲取張量需要使用graph.get_tensor_by_name加張量名
        # 這里的張量可以直接用於session的run方法求值了
        # 補充一個基礎知識,形如'conv1'是節點名稱,而'conv1:0'是張量名稱,表示節點的第一個輸出張量
        self.input_tensor = self.graph.get_tensor_by_name(self.input_tensor_name)
        self.layer_tensors = [self.graph.get_tensor_by_name(name + ':0') for name   in self.layer_operation_names]

 『TensorFlow』遷移學習_他山之石,可以攻玉

『TensorFlow』模型載入方法匯總

download.py

import os
import sys
import tarfile
import zipfile
import urllib.request

def _print_download_progress(count, block_size, total_size):
    '''打印下載進度'''
    # 當前下載進度
    pct_complete = float(count * block_size / total_size)
    # 強化表達式格式輸出,\r表示新行,后面的表示小數點后一位百分數
    msg = '\r- Download progress: {0:.1%}'.format(pct_complete)
    # 輸出,這里和print功能差不多
    sys.stdout.write(msg)
    sys.stdout.flush()

def exist_or_download(url,dir):
    # url末尾為文件名,提取出來
    file_name = url.split('/')[-1]
    # 保存文件路徑&文件名
    file_path = os.path.join(dir, file_name)
    if not os.path.exists(file_path):
        if not os.path.exists(dir):
            os.makedirs(dir)

        # 下載文件
        file_path, _ = urllib.request.urlretrieve(url=url,
                                                  filename=file_path,
                                                  reporthook=_print_download_progress)
        # 參數 finename 指定了保存本地路徑(如果參數未指定,urllib會生成一個臨時文件保存數據。)
        # 參數 reporthook 是一個回調函數,當連接上服務器、以及相應的數據塊傳輸完畢時會觸發該回調,我們可以利用這個回調函數來顯示當前的下載進度。
        # 參數 data 指 post 到服務器的數據
        # 該方法返回一個包含兩個元素的(filename, headers)元組,filename 表示保存到本地的路徑,header 表示服務器的響應頭。

        print('\r- Download finish.')
        # 自動解壓
        if file_path.endwith('.zip'):
            zipfile.ZipFile(file=file_path, mode='r').extractall(dir)
        elif file_path.endwith('.tar.gz','tgz'):
            tarfile.open(name=file_path, model='r:gz').extractall(dir)

        print('Done')
    else:
        print('Data has apparently alreadly been downloaded and unpacked.')

sys.stdout.write()

由於python3中print有end關鍵字,所以意義不大,但是在2.x中若想實現輸出不換行,只能直接調用stdout對象的write方法了,因為stdout沒有end這個符號這一說,輸出不會換行,因此如果你想同一樣輸出多次,在需要輸出的字符串對象里面加上"\r",就可以回到行首。

urllib.request.urlretrieve():請求url的常用方法

urllib.request.urlretrieve(url=url,
                          filename=file_path,
                          reporthook=_print_download_progress)
# 參數 finename 指定了保存本地路徑(如果參數未指定,urllib會生成一個臨時文件保存數據。)
# 參數 reporthook 是一個回調函數需要自己實現,當連接上服務器、以及相應的數據塊傳輸完畢時會觸發該回調,會傳入(已接受數據塊數量,數據塊大小,文件總大小)三個參數。
# 參數 data 指 post 到服務器的數據
# 該方法返回一個包含兩個元素的(filename, headers)元組,filename 表示保存到本地的路徑,header 表示服務器的響應頭。

解壓函數:先讀取,后創建

if file_path.endwith('.zip'):
    zipfile.ZipFile(file=file_path, mode='r').extractall(dir)
elif file_path.endwith('.tar.gz','tgz'):
    tarfile.open(name=file_path, model='r:gz').extractall(dir)

  

Style_transfer.py

import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from PIL import Image
import vgg16

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print('\nTensotFlow version:', tf.__version__, '\n')

vgg16.model_download()


"""圖像處理輔助函數"""

def load_image(file_name, max_size=None):
    '''把圖片放大為最長邊等於max_size的大小,並轉換為float32數組'''
    image = Image.open(file_name)
    if max_size is not None:
        factor = max_size/np.max(image.size)
        size = np.array(image.size)*factor
        size = size.astype(int)
        image = image.resize(size, Image.LANCZOS)   # 插值算法,常用於下采樣(放大)
    return np.float32(image)

def save_image(image, file_name):
    '''把數組轉化成圖像'''
    # 像素切割,類型轉換
    image = np.clip(image, 0.0, 255.0).astype(np.uint8)
    # 存為圖片
    with open(file_name, 'wb') as file:
        Image.fromarray(image).save(file, 'jpeg')

def plot_image(image):
    '''展示數組對應的圖片'''
    image = np.clip(image, 0.0, 255.0).astype(np.uint8)
    plt.figure()
    plt.imshow(Image.fromarray(image))
    plt.show()

def plot_images(content_image, style_image, mixed_image):
    '''展示三張圖片'''
    fig, axes = plt.subplots(1,3,figsize=(10,10))
    fig.subplots_adjust(hspace=0.1, wspace=0.1)

    ax = axes.flat[0]
    ax.imshow(content_image/255.0)
    ax.set_xlabel('Content')

    ax = axes.flat[1]
    ax.imshow(mixed_image/255.0)
    ax.set_xlabel('Mixed')

    ax = axes.flat[2]
    ax.imshow(style_image / 255.0)
    ax.set_xlabel('Style')

    # 刪除坐標軸
    for ax in axes.flat:
        ax.set_xticks([])
        ax.set_yticks([])
    plt.show()


"""損失函數"""

def mean_squared_error(a, b):
    '''
    求兩個張量的平方差
    :param a: 張量a,tf的數據格式
    :param b: 張量b,tf的數據格式
    :return: 平方差值
    '''
    return tf.reduce_mean(tf.square(a-b))

def create_content_loss(session, model, content_image, layer_ids):
    '''
    內容學習損失函數
    :param session: tf會話對象 
    :param model: 模型對象
    :param content_image: 內容圖片
    :param layer_ids: 模型層list
    :return: 損失函數值
    '''
    feed_dict = model.create_feed_dict(input_image=content_image)
    layers = model.get_layer_tensors(layer_ids)
    values = session.run(layers, feed_dict=feed_dict)

    with model.graph.as_default():
        layer_losses= []
        for value, layer in zip(values, layers):

            # print(layer, tf.constant(value))

            loss = mean_squared_error(layer, tf.constant(value))
            layer_losses.append(loss)
        total_loss = tf.reduce_mean(layer_losses)
        return total_loss

def gram_matrix(tensor):
    '''創建格拉姆矩陣,本質上就是風格層中激活特征向量的點乘矩陣'''
    shape = tensor.get_shape()
    factor = tf.reshape(tensor, shape=[-1, int(shape[3])])
    matrix = tf.matmul(factor, factor, transpose_a=True)
    return matrix

def create_style_loss(session, model, style_image, layer_ids):
    '''
    風格學習損失函數
    :param session: tf會話對象 
    :param model: 模型對象
    :param style_image: 風格圖片
    :param layer_ids: 模型層list
    :return: 
    '''
    feed_dict = model.create_feed_dict(input_image=style_image)
    layers = model.get_layer_tensors(layer_ids)
    gram_layers = [gram_matrix(layer) for layer in layers]

    # print(np.asarray(session.run(layers,feed_dict)[0]).shape)

    with model.graph.as_default():
        values = session.run(gram_layers, feed_dict=feed_dict)
        layer_losses = []
        for value, gram_layer in zip(values, gram_layers):
            loss = mean_squared_error(gram_layer, tf.constant(value))
            layer_losses.append(loss)
        total_loss = tf.reduce_mean(layer_losses)
    return total_loss

def create_denoise_loss(model):
    '''混合圖像去噪損失函數'''
    loss = tf.reduce_sum(tf.abs(model.input_tensor[:,1:,:,:] - model.input_tensor[:,:-1,:,:])) + \
           tf.reduce_sum(tf.abs(model.input_tensor[:,:,1:,:] - model.input_tensor[:,:,:-1,:]))
    return loss


"""風格遷移"""

def style_transfer(content_image, style_image,
                   content_layer_ids, style_layer_ids,
                   weight_content=1.5, weight_style=10.0, weight_denoise=0.3,
                   num_iterations=120, learning_rate=10.0):
    '''
    
    :param content_image: 內容學習對象
    :param style_image: 風格學習對象
    :param content_layer_ids: 內容特征提取層
    :param style_layer_ids: 風格特征提取層
    :param weight_content: 內容loss函數權重
    :param weight_style: 風格loss函數權重
    :param weight_denoise: 降噪loss函數權重
    :param num_iterations: 迭代次數
    :param learning_rate: 學習率
    :return: 最終輸出圖
    '''
    model = vgg16.VGG16()
    session = tf.Session(graph=model.graph)

    print('Content layers:\n',model.get_layer_names(content_layer_ids),'\n')
    print('Style layers:\n',model.get_layer_names(style_layer_ids),'\n')

    loss_content = create_content_loss(session, model, content_image, content_layer_ids)
    loss_style = create_style_loss(session, model, style_image, style_layer_ids)
    loss_denoise = create_denoise_loss(model)

    with model.graph.as_default():
        adj_content = tf.Variable(1e-10, name='adj_content')
        adj_style = tf.Variable(1e-10, name='adj_style')
        adj_denoise = tf.Variable(1e-10, name='adj_denoise')
        init = tf.global_variables_initializer()
    session.run(init)

    update_adj_content = adj_content.assign(1.0 / (loss_content+1e-10))
    update_adj_style = adj_style.assign(1.0 / (loss_style+1e-10))
    update_adj_denoise = adj_denoise.assign(1.0 / (loss_denoise+1e-10))

    loss = weight_content * adj_content * loss_content + \
           weight_style * adj_style * loss_style + \
           weight_denoise * adj_denoise * loss_denoise
    # tf.summary.scalar(loss, 'loss')
    # merge = tf.summary.merge_all()

    gradient = tf.gradients(loss, model.input_tensor)  # 計算梯度,並非使用optimizer類實現

    run_list = [gradient, update_adj_content, update_adj_style, update_adj_denoise]
    mixed_image = np.random.rand(*content_image.shape) + 128

    '''
    from PIL import Image
    plt.imshow(Image.fromarray(np.uint8(mixed_image)))
    plt.savefig('{0}.png'.format([-1]),format='png')
    '''

    mixed_images = []
    for i in range(num_iterations):
        feed_dict = model.create_feed_dict(mixed_image)

        # layers = model.get_layer_tensors(style_layer_ids)
        # gram_layers = [gram_matrix(layer) for layer in layers]
        # print(np.asarray(session.run(gram_layers,feed_dict)[0]).shape)

        grad, adj_content_val, adj_style_val, adj_denoise_val = session.run(run_list, feed_dict=feed_dict)
        lr_step = learning_rate / (np.std(grad)+1e-8)
        mixed_image -= np.squeeze(np.asarray(grad) * lr_step)  # <-------好多坑
        mixed_image = np.clip(mixed_image, 0.0, 255.0)
        print('.',end='')  # 每次迭代畫一個點,模擬進度條
        if (i%10 == 0) or (i == num_iterations-1):
            print('\n', 'Iteration:', i)
            print('Weight Adj. for Content: {0:2e}, Style: {1:2e}, Denoise: {2:2e}'.format(
                adj_content_val, adj_style_val, adj_denoise_val))
            plot_images(content_image, style_image, mixed_image)
            mixed_images.append(mixed_image)
    print('\n', 'Final images:')
    plot_image(mixed_image)
    session.close()
    return mixed_images

if __name__=='__main__':
    content_filename = 'images/image_r.jpg'
    content_image = load_image(content_filename,max_size=None)
    style_filename = 'images/mosaic.jpg'
    style_image = load_image(style_filename,max_size=300)

    print('\nContent image size: \r ', content_image.shape,
          '\rStyle image size: \r ',style_image.shape, '\n')

    content_layer_ids = [4]
    style_layer_ids = list(range(13))
    img = style_transfer(content_image=content_image,
                         style_image=style_image,
                         content_layer_ids=content_layer_ids,
                         style_layer_ids=style_layer_ids,
                         weight_content=1.5,
                         weight_style=10.0,
                         weight_denoise=0.3,
                         num_iterations=300,
                         learning_rate=10.0)


"""
細節1:
這應該是個全卷積型網絡,由於沒有用到FC層所以不能肯定,但是至少feed部分沒有限制尺寸,
斯格拉姆矩陣利用[固定深度C,不定長L].dot([不定長L,固定深度C])=[C,C],忽略兩者feature尺寸的差距,所以理所當然的混合圖
大小和內容圖大小必須下相同。
細節2:
在loss函數建立的過程中,在靜態的graph中動態的構建了新的op,之所以這么說是在靜態loss函數op構建中使用了session得到了
中間的參量(也就是content和style的feature),而整個loss函數所在graph的目標輸入是mixed。
細節3:
在def style_transfer()中有一處標注了坑,這里面有一點突發狀況,新建的Variable節點被報錯不存在於默認圖中,經排查,這
是因為這幾個節點和model類中的節點不存在依賴關系,而model中的graph不是默認graph,所以要添加上下文環境使之進入model的
graph中。
細節3:
loss函數整合時有這樣的計算公式:權重*本次loss/上次loss,調整更新速度,感覺意義不大。
"""

"""
# 保存輸出圖像
from PIL import Image
for i in range(len(img)):
    image = Image.fromarray(np.uint8(img[i]))
    plt.imshow(image)
    plt.savefig('{0}.png'.format([i]),format='png')
"""

計算梯度函數注意一下,意外的方便,

tf.gradients(loss, model.input_tensor)  # 計算梯度,並非使用optimizer類實現

效果一般,我認為是風格層選的太多了,應該更多的注重高層的特征,減少底層的特征學習,否則學不出來漩渦的結構。

 使用

"""
# 保存輸出圖像
from PIL import Image
for i in range(len(img)):
    image = Image.fromarray(np.uint8(img[i]))
    plt.imshow(image)
    plt.savefig('{0}.png'.format([i]),format='png')
"""

  

最近(17.9.19)使用這個內容做了課堂展示,所以進行了一點探究,幾點總結如下:

 實驗室的電腦配的差不多了,嘗試看看能不能得到更好的結果。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM