ESRGAN是在SRGAN基礎上進行增強的網絡, 在2018年的比賽中獲得了冠軍
Introduction 介紹了基於PSNR指標獲得的超分辨圖像其結果會較為平滑,且與人們的主觀有較大的差別
作者針對SRGAN提出了3個改進的點
第一點: 使用了RDDB網絡結構, 這種層疊式的網絡結構更容易表達效果
第二點: 使用RaGAN對抗網絡, 這種網絡在優化生成器的時候,將判別真實圖片的概率值考慮了進來
第三點:對於VGG生成的語義紋理損失值,使用了relu之前的輸出結果,進行比較
因為PSNR和SSIM的指標與人眼的視覺指標存在差異,因此作者使用了競賽中的指標 perceptual index
作者進行改進的點: 第一個在網絡結構上的改進,使用了RRDB層

RRDB代碼: 先進入class RRDBNet,然后調轉到class RRDB,最后跳轉到class ResidualDenseBlock
import functools import torch import torch.nn as nn import torch.nn.functional as F import models.modules.module_util as mutil class ResidualDenseBlock_5C(nn.Module): def __init__(self, nf=64, gc=32, bias=True): super(ResidualDenseBlock_5C, self).__init__() # gc: growth channel, i.e. intermediate channels self.conv1 = nn.Conv2d(nf, gc, 3, 1, 1, bias=bias) self.conv2 = nn.Conv2d(nf + gc, gc, 3, 1, 1, bias=bias) self.conv3 = nn.Conv2d(nf + 2 * gc, gc, 3, 1, 1, bias=bias) self.conv4 = nn.Conv2d(nf + 3 * gc, gc, 3, 1, 1, bias=bias) self.conv5 = nn.Conv2d(nf + 4 * gc, nf, 3, 1, 1, bias=bias) self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) # initialization mutil.initialize_weights([self.conv1, self.conv2, self.conv3, self.conv4, self.conv5], 0.1) def forward(self, x): x1 = self.lrelu(self.conv1(x)) x2 = self.lrelu(self.conv2(torch.cat((x, x1), 1))) x3 = self.lrelu(self.conv3(torch.cat((x, x1, x2), 1))) x4 = self.lrelu(self.conv4(torch.cat((x, x1, x2, x3), 1))) x5 = self.conv5(torch.cat((x, x1, x2, x3, x4), 1)) return x5 * 0.2 + x class RRDB(nn.Module): '''Residual in Residual Dense Block''' def __init__(self, nf, gc=32): super(RRDB, self).__init__() self.RDB1 = ResidualDenseBlock_5C(nf, gc) self.RDB2 = ResidualDenseBlock_5C(nf, gc) self.RDB3 = ResidualDenseBlock_5C(nf, gc) def forward(self, x): out = self.RDB1(x) out = self.RDB2(out) out = self.RDB3(out) return out * 0.2 + x class RRDBNet(nn.Module): def __init__(self, in_nc, out_nc, nf, nb, gc=32): super(RRDBNet, self).__init__() RRDB_block_f = functools.partial(RRDB, nf=nf, gc=gc) self.conv_first = nn.Conv2d(in_nc, nf, 3, 1, 1, bias=True) # 進行第一次的卷積 self.RRDB_trunk = mutil.make_layer(RRDB_block_f, nb) # 添加RRDB殘差塊 self.trunk_conv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) # 再次進行卷積操作 #### upsampling self.upconv1 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) self.upconv2 = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) self.HRconv = nn.Conv2d(nf, nf, 3, 1, 1, bias=True) self.conv_last = nn.Conv2d(nf, out_nc, 3, 1, 1, bias=True) self.lrelu = nn.LeakyReLU(negative_slope=0.2, inplace=True) def forward(self, x): fea = self.conv_first(x) trunk = self.trunk_conv(self.RRDB_trunk(fea)) fea = fea + trunk fea = self.lrelu(self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest'))) fea = self.lrelu(self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest'))) out = self.conv_last(self.lrelu(self.HRconv(fea))) return out
作者進行改進的點: 第二個在使用了RaGan對抗生成網絡,對於判別器和生成器使用相對概率
對於生成器的ragan損失值,即
真實圖片相對於虛假圖片判斷為假的損失值
虛假圖片相對於真實圖片判別為真的損失值
pred_g_fake = self.netD(self.fake_H) # 判別生成的圖片 if self.opt['train']['gan_type'] == 'gan': l_g_gan = self.l_gan_w * self.cri_gan(pred_g_fake, True) elif self.opt['train']['gan_type'] == 'ragan': pred_d_real = self.netD(self.var_ref).detach() # 判別真實的圖片的概率值 l_g_gan = self.l_gan_w * ( self.cri_gan(pred_d_real - torch.mean(pred_g_fake), False) + # self.cri_gan(pred_g_fake - torch.mean(pred_d_real), True)) / 2 # 生成的圖片比真實圖片更加的真實
l_g_total += l_g_gan
對於判別器的ragan損失值,即
真實圖片相對於虛假圖片判別為真的損失值
虛假圖片相對於真實圖片判別為假的損失值
pred_d_real = self.netD(self.var_ref) pred_d_real = self.netD(self.var_ref) pred_d_fake = self.netD(self.fake_H.detach()) # detach to avoid BP to G if self.opt['train']['gan_type'] == 'gan': l_d_real = self.cri_gan(pred_d_real, True) l_d_fake = self.cri_gan(pred_d_fake, False) l_d_total = l_d_real + l_d_fake elif self.opt['train']['gan_type'] == 'ragan': l_d_real = self.cri_gan(pred_d_real - torch.mean(pred_d_fake), True) l_d_fake = self.cri_gan(pred_d_fake - torch.mean(pred_d_real), False) l_d_total = (l_d_real + l_d_fake) / 2
作者進行改進的點: 第二個使用vgg進行紋理細節提取時,使用最后一層conv的relu之前的輸出結果
作者對最后一層的conv輸出和relu輸出做了可視化結果比對
將vgg損失值進行添加
if self.cri_fea: # feature loss real_fea = self.netF(self.var_H).detach() # 表示真實圖片的VGG損失值 fake_fea = self.netF(self.fake_H) # l_g_fea = self.l_fea_w * self.cri_fea(fake_fea, real_fea) # 構建L1損失值 l_g_total += l_g_fea
這里使用的是vgg19的不帶bn的輸出
class VGGFeatureExtractor(nn.Module): def __init__(self, feature_layer=34, use_bn=False, use_input_norm=True, device=torch.device('cpu')): super(VGGFeatureExtractor, self).__init__() self.use_input_norm = use_input_norm if use_bn: model = torchvision.models.vgg19_bn(pretrained=True) else: model = torchvision.models.vgg19(pretrained=False) if self.use_input_norm: mean = torch.Tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1).to(device) # [0.485 - 1, 0.456 - 1, 0.406 - 1] if input in range [-1, 1] std = torch.Tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1).to(device) # [0.229 * 2, 0.224 * 2, 0.225 * 2] if input in range [-1, 1] self.register_buffer('mean', mean) # 構建處理步驟的名字 self.register_buffer('std', std) self.features = nn.Sequential(*list(model.features.children())[:(feature_layer + 1)]) # No need to BP to variable for k, v in self.features.named_parameters(): v.requires_grad = False # 為了防止梯度進行更新 def forward(self, x): # Assume input range is [0, 1] if self.use_input_norm: x = (x - self.mean) / self.std output = self.features(x) return output
結果展示
ESRGAN網絡在PSNR指標上表現的不是很好,但是在perceptual index上相當於其他方法有突出的表現