利用DCGAN生成動漫頭像


1.首先獲取到動漫圖片,這里用python的scrapy框架來爬取動漫圖片。圖片來自於https://konachan.net/post?page=1。這里給出幾個關鍵文件內容:

(1)items.py

# -*- coding: utf-8 -*-

# Define here the models for your scraped items
#
# See documentation in:
# https://docs.scrapy.org/en/latest/topics/items.html

import scrapy


class KonachanItem(scrapy.Item):
    # define the fields for your item here like:
    # name = scrapy.Field()
    img = scrapy.Field()

(2)spider.py(自己創建的)

import scrapy
import os
import requests
from ..items import KonachanItem

class MySpider(scrapy.spiders.Spider):
    name = "konachan"
    start_urls = ['https://konachan.net/post?page=1']

    def parse(self,response):
        base_url = 'https://konachan.net/post?page='
        page_info = response.xpath("//div[@class='pagination']/a/text()").extract()
        last_page = int(page_info[-2])
        
        for page in range(1, last_page+1):
            url = base_url + str(page)

            yield scrapy.Request(url,callback=self.getImg)


    def getImg(self,response):
        imgs = response.xpath("//a[@class='thumb']/img/@src").extract()

        for img in imgs:
            item = KonachanItem()
            imglist = [] 
            imglist.append(img) #要用列表,否則會報錯
            item['img'] = imglist
            yield item

(3)settings.py(只給出修改的幾行)

ITEM_PIPELINES = {'scrapy.pipelines.images.ImagesPipeline': 300}

IMAGES_STORE = 'images' #保存文件的目錄
IMAGES_URLS_FIELD = 'img' #和在items.py里面定義的field一致

最后在項目根目錄運行項目即可,scrapy crawl konachan。

最后結果:在項目根目錄會生成images/full文件夾,里面是爬取的圖片。大約有16w+張圖片。

 

 2.由於我們需要獲取的是頭像圖片來訓練DCGAN,故需要對這些圖片進行處理。這里使用github上的一個基於opencv的工具。https://github.com/nagadomi/lbpcascade_animeface

import cv2
import sys
import os.path
from glob import glob

def detect(filename, cascade_file="lbpcascade_animeface.xml"):
    if not os.path.isfile(cascade_file):
        raise RuntimeError("%s: not found" % cascade_file)

    cascade = cv2.CascadeClassifier(cascade_file)
    image = cv2.imread(filename)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    gray = cv2.equalizeHist(gray)

    faces = cascade.detectMultiScale(gray,
                                     # detector options
                                     scaleFactor=1.1,
                                     minNeighbors=5,
                                     minSize=(48, 48))
    for i, (x, y, w, h) in enumerate(faces):
        face = image[y: y + h, x:x + w, :]
        face = cv2.resize(face, (96, 96))
        save_filename = '%s-%d.jpg' % (os.path.basename(filename).split('.')[0], i)
        cv2.imwrite("faces/" + save_filename, face)


if __name__ == '__main__':
    if os.path.exists('faces') is False:
        os.makedirs('faces') #存放最后頭像圖片的目錄
    file_list = glob('images/full/*.jpg') #之前爬取的圖片所在目錄
    for filename in file_list:
        detect(filename)

注意:一定要有lbpcascade_animeface.xml文件。

最后結果:大約有64000+張頭像圖片。

 

 3.現在用這些頭像圖片來訓練DCGAN。這里使用pytorch框架實現。github源碼地址:https://github.com/eriklindernoren/PyTorch-GAN/blob/master/implementations/dcgan/dcgan.py(需要根據自己的要求修改模型配置)

import argparse
import os
import numpy as np
import math

import torchvision.transforms as transforms
from torchvision.utils import save_image

from torch.utils.data import DataLoader
from torchvision import datasets
from torch.autograd import Variable

import torch.nn as nn
import torch.nn.functional as F
import torch

os.makedirs("fake", exist_ok=True)

parser = argparse.ArgumentParser()
parser.add_argument("--n_epochs", type=int, default=100, help="number of epochs of training")
parser.add_argument("--batch_size", type=int, default=64, help="size of the batches")
parser.add_argument("--lr", type=float, default=0.0002, help="adam: learning rate")
parser.add_argument("--b1", type=float, default=0.5, help="adam: decay of first order momentum of gradient")
parser.add_argument("--b2", type=float, default=0.999, help="adam: decay of first order momentum of gradient")
parser.add_argument("--n_cpu", type=int, default=8, help="number of cpu threads to use during batch generation")
parser.add_argument("--latent_dim", type=int, default=100, help="dimensionality of the latent space")
parser.add_argument("--img_size", type=int, default=96, help="size of each image dimension")
parser.add_argument("--channels", type=int, default=3, help="number of image channels")
opt = parser.parse_args()
print(opt)

cuda = True if torch.cuda.is_available() else False


def weights_init_normal(m):
    classname = m.__class__.__name__
    if classname.find("Conv") != -1:
        torch.nn.init.normal_(m.weight.data, 0.0, 0.02)# init the params of conv layer
    elif classname.find("BatchNorm2d") != -1:
        torch.nn.init.normal_(m.weight.data, 1.0, 0.02) #init the params of bn layer
        torch.nn.init.constant_(m.bias.data, 0.0)


class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()

        self.init_size = opt.img_size // 4  
        self.l1 = nn.Sequential(nn.Linear(opt.latent_dim, 128 * self.init_size ** 2)) 

        self.conv_blocks = nn.Sequential(
            nn.BatchNorm2d(128),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 128, 3, stride=1, padding=1),
            nn.BatchNorm2d(128, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, 3, stride=1, padding=1),
            nn.BatchNorm2d(64, 0.8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, opt.channels, 3, stride=1, padding=1),
            nn.Tanh(),
        )

    def forward(self, z):
        out = self.l1(z)
        out = out.view(out.shape[0], 128, self.init_size, self.init_size)
        img = self.conv_blocks(out)
        return img


class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()

        def discriminator_block(in_filters, out_filters, bn=True):
            block = [nn.Conv2d(in_filters, out_filters, 3, 2, 1), nn.LeakyReLU(0.2, inplace=True), nn.Dropout2d(0.25)]
            if bn:
                block.append(nn.BatchNorm2d(out_filters, 0.8))
            return block

        self.model = nn.Sequential(
            *discriminator_block(opt.channels, 16, bn=False),
            *discriminator_block(16, 32),
            *discriminator_block(32, 64),
            *discriminator_block(64, 128),
        )

        # The height and width of downsampled image
        ds_size = opt.img_size // 2 ** 4
        self.adv_layer = nn.Sequential(nn.Linear(128 * ds_size ** 2, 1), nn.Sigmoid())

    def forward(self, img):
        out = self.model(img)
        out = out.view(out.shape[0], -1)
        validity = self.adv_layer(out)

        return validity


# Loss function
adversarial_loss = torch.nn.BCELoss()

# Initialize generator and discriminator
generator = Generator()
discriminator = Discriminator()

if cuda:
    generator.cuda()
    discriminator.cuda()
    adversarial_loss.cuda()

# Initialize weights
generator.apply(weights_init_normal)
discriminator.apply(weights_init_normal)

# Configure data loader


dataloader = torch.utils.data.DataLoader(
    datasets.ImageFolder(
    root="./faces", #這里改為訓練數據所在目錄
    transform=transforms.Compose(
            [transforms.Resize(opt.img_size), transforms.ToTensor(), transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))]
    )),
    batch_size=opt.batch_size,
    shuffle=True,
    drop_last=True
)



# Optimizers
optimizer_G = torch.optim.Adam(generator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))
optimizer_D = torch.optim.Adam(discriminator.parameters(), lr=opt.lr, betas=(opt.b1, opt.b2))

Tensor = torch.cuda.FloatTensor if cuda else torch.FloatTensor

# ----------
#  Training
# ----------

for epoch in range(opt.n_epochs):
    for i, (imgs, _) in enumerate(dataloader):

        # Adversarial ground truths
        valid = Variable(Tensor(imgs.shape[0], 1).fill_(1.0), requires_grad=False)
        fake = Variable(Tensor(imgs.shape[0], 1).fill_(0.0), requires_grad=False)

        # Configure input
        real_imgs = Variable(imgs.type(Tensor))

        # -----------------
        #  Train Generator
        # -----------------

        optimizer_G.zero_grad()

        # Sample noise as generator input
        z = Variable(Tensor(np.random.normal(0, 1, (imgs.shape[0], opt.latent_dim))))

        # Generate a batch of images
        gen_imgs = generator(z)

        # Loss measures generator's ability to fool the discriminator
        g_loss = adversarial_loss(discriminator(gen_imgs), valid) # make the img generated from generator close to real image,minimize the loss of fake image with valid

        g_loss.backward()
        optimizer_G.step()

        # ---------------------
        #  Train Discriminator
        # ---------------------

        optimizer_D.zero_grad()

        # Measure discriminator's ability to classify real from generated samples
        real_loss = adversarial_loss(discriminator(real_imgs), valid)
        fake_loss = adversarial_loss(discriminator(gen_imgs.detach()), fake)
        d_loss = (real_loss + fake_loss) / 2

        d_loss.backward()
        optimizer_D.step()

        print(
            "[Epoch %d/%d] [Batch %d/%d] [D loss: %f] [G loss: %f]"
            % (epoch, opt.n_epochs, i, len(dataloader), d_loss.item(), g_loss.item())
        )

        #save_image every epoch
        if((i + 1) == len(dataloader)):
          save_image(gen_imgs.data[:25], "fake/%d.png" % epoch, nrow=5, normalize=True)

注意:在DataLoader中的root應該配置成自己訓練數據的目錄,並且在該目錄下還需要再創建一個目錄,該目錄是訓練數據的分類名。比如,我的訓練數據存放在"faces/real/"下,而root應該寫"faces"。否則會報錯。

 第一個epoch:

 

 第10個epoch:

 

 第100個epoch:

 

 

額,感覺看的很別扭,不知道哪出問題了,以后有時間再調下參數吧。如果有大神知道的話,希望大神指出錯誤,萬分感謝。

 

注:使用爬蟲收集的數據只是用於學習,不作商業用途,如有侵權,還請告知。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM