本文主要實現了兩個工作:1.驗證碼生成 2.Pytorch識別驗證碼
一. 驗證碼生成
方法1. 利用PIL庫的ImageDraw
實現繪圖,此法參考博客實現:

#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Mar 27 15:45:04 2018 @author: lps """ from PIL import Image, ImageDraw, ImageFont, ImageFilter import random import cv2 import numpy as np import matplotlib.pyplot as plt path = '/media/lps/python-3.5.2.amd64/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/' # 選擇字體 data_path = '/home/lps/yanzm/' # random chr def rndChar(): return chr(random.randint(65, 90)) # 隨機字母 def rndInt(): return str(random.randint(0,9)) # 隨機數字 def rndColor(): return (random.randint(64, 255), random.randint(64, 255), random.randint(64, 255)) # 隨機顏色 def rndColor2(): return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127)) # 隨機顏色 def gaussian_noise(): # 高斯噪聲 mu = 125 sigma = 20 return tuple((np.random.normal(mu, sigma, 3).astype(int))) def rotate(x, angle): # 旋轉 M_rotate = cv2.getRotationMatrix2D((x.shape[0]/2, x.shape[1]/2), angle, 1) x = cv2.warpAffine(x, M_rotate, (x.shape[0], x.shape[1])) return x width = 180 * 4 height = 180 def gen_image(num): for l in range(num): image = Image.new('RGB', (width, height), (255, 255, 255)) # 先生成一張大圖 font = ImageFont.truetype(path+'cmb10.ttf', 36) draw = ImageDraw.Draw(image) # 新的畫板 for x in range(0,width): for y in range(0,height): draw.point((x, y), fill=rndColor()) label = [] for t in range(4): # 每一張驗證碼4個數字 numb = rndInt() draw.text((180 * t + 60+10, 60+10), numb, font=font, fill=rndColor2()) label.append(numb) with open(data_path+"label.txt","a") as f: for s in label: f.write(s + ' ') f.writelines("\n") # 寫入label img = image.filter(ImageFilter.GaussianBlur(radius=0.5)) img = np.array(img) img1 = np.array([]) for i in range(0,4): img0 = img[:, 180*i: 180*i+180] # 提取含有驗證碼的小圖 angle = random.randint(-45, 45) img0 = rotate(img0, angle) # 對小圖隨機旋轉 if img1.any(): img1 = np.concatenate((img1, img0[60:120, 60:120, :]), axis=1) else: img1 = img0[60:120, 60:120, :] plt.imsave(data_path+'src/' + str(l)+'.jpg', img1) # 保存結果 if __name__=='__main__': gen_image(100)
結果大致:
方法2. 利用更專業的庫:captcha

#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Sun Mar 25 19:06:46 2018 @author: lps """ from captcha.image import ImageCaptcha import numpy as np #import matplotlib.pyplot as plt from PIL import Image import random import cv2 number = ['0','1','2','3','4','5','6','7','8','9'] alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z'] ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z'] data_path = '/home/lps/yanzm/' def random_captcha_text(char_set=number,captcha_size=4): # 可以設置只用來生成數字 captcha_text = [] for i in range(captcha_size): c = random.choice(char_set) captcha_text.append(c) return captcha_text def gen_capthcha_text_and_image(m): image = ImageCaptcha() captcha_text = random_captcha_text() # 生成數字 captcha_text = ' '.join(captcha_text) # 生成標簽 captcha = image.generate(captcha_text) # image.write(captcha_text,captcha_text+'.jpg') captcha_image = Image.open(captcha) captcha_image = np.array(captcha_image) with open(data_path+"label.txt","a") as f: # 寫入標簽 f.write(captcha_text) f.writelines("\n") cv2.imwrite(data_path + '/src/'+'%.4d.jpg'%m, captcha_image) # 保存 # return captcha_text,captcha_image if __name__ == '__main__': for m in range(0,5000): # text,image = gen_capthcha_text_and_image() gen_capthcha_text_and_image(m) # f = plt.figure() # ax = f.add_subplot(212) # ax.text(0.1,0.1,text,ha='center',va='center',transform=ax.transAxes) # plt.imshow(image) # plt.show() #
結果大致:
二. pytorch實現
對於一張驗證碼來說作為一張單一的圖片,每輸入一張圖片,得到四個數字作為輸出,只有4個數字同時預測正確才表示預測正確。所以在每一張圖上是四個多二分類器:因為驗證碼上面的數字為0-9,類似於mnist,只不過此時一張圖片對應於4個數字。所以思路很簡單,實現如下:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Fri Mar 30 15:46:09 2018 @author: lps """ import torch import torch.nn as nn import torch.nn.functional as F from torch.autograd import Variable import torch.optim as optim import torchvision.models as models import torchvision from torch.utils.data import Dataset, DataLoader from torchvision import transforms, utils import matplotlib.pyplot as plt from PIL import Image #import pandas as pd import numpy as np import os import copy, time file_path = '/home/lps/yanzm' BATCH_SIZE = 16 EPOCH = 10 # Load data class dataset(Dataset): def __init__(self, root_dir, label_file, transform=None): self.root_dir = root_dir self.label = np.loadtxt(label_file) self.transform = transform def __getitem__(self, idx): img_name = os.path.join(self.root_dir,'%.4d.jpg'%idx) image = Image.open(img_name) labels = self.label[idx,:] # sample = image if self.transform: image = self.transform(image) return image, labels def __len__(self): return (self.label.shape[0]) data = dataset(file_path+'/src', file_path+'/label.txt',transform=transforms.ToTensor()) dataloader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True) dataset_size = len(data) # Conv network class ConvNet(nn.Module): def __init__(self): super(ConvNet, self).__init__() self.conv =nn.Sequential( nn.Conv2d(3, 32, kernel_size=4, stride=1, padding=2), # in:(bs,3,60,160) nn.BatchNorm2d(32), nn.LeakyReLU(0.2, inplace=True), nn.MaxPool2d(kernel_size=2), # out:(bs,32,30,80) nn.Conv2d(32, 64, kernel_size=4, stride=1, padding=2), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, inplace=True), nn.MaxPool2d(kernel_size=2), # out:(bs,64,15,40) nn.Conv2d(64, 64, kernel_size=3 ,stride=1, padding=1), nn.BatchNorm2d(64), nn.LeakyReLU(0.2, inplace=True), nn.MaxPool2d(kernel_size=2) # out:(bs,64,7,20) ) self.fc1 = nn.Linear(64*7*20, 500) self.fc2 = nn.Linear(500,40) def forward(self, x): x = self.conv(x) x = x.view(x.size(0), -1) # reshape to (batch_size, 64 * 7 * 30) output = self.fc1(x) output = self.fc2(output) return output # Train the net class nCrossEntropyLoss(torch.nn.Module): def __init__(self, n=4): super(nCrossEntropyLoss, self).__init__() self.n = n self.total_loss = 0 self.loss = nn.CrossEntropyLoss() def forward(self, output, label): output_t = output[:,0:10] label = Variable(torch.LongTensor(label.data.cpu().numpy())).cuda() label_t = label[:,0] for i in range(1, self.n): output_t = torch.cat((output_t, output[:,10*i:10*i+10]), 0) # 損失的思路是將一張圖平均剪切為4張小圖即4個多分類,然后再用多分類交叉熵方損失 label_t = torch.cat((label_t, label[:,i]), 0) self.total_loss = self.loss(output_t, label_t) return self.total_loss def equal(np1,np2): n = 0 for i in range(np1.shape[0]): if (np1[i,:]==np2[i,:]).all(): n += 1 return n net = ConvNet().cuda() optimizer = torch.optim.Adam(net.parameters(), lr=0.001) #loss_func = nn.CrossEntropyLoss() loss_func = nCrossEntropyLoss() best_model_wts = copy.deepcopy(net.state_dict()) best_acc = 0.0 since = time.time() for epoch in range(EPOCH): running_loss=0.0 running_corrects=0 for step,(inputs,label) in enumerate(dataloader): pred = torch.LongTensor(BATCH_SIZE,1).zero_() inputs = Variable(inputs).cuda() # (bs, 3, 60, 240) label = Variable(label).cuda() # (bs, 4) optimizer.zero_grad() output = net(inputs) # (bs, 40) loss = loss_func(output, label) for i in range(4): pre = F.log_softmax(output[:,10*i:10*i+10], dim=1) # (bs, 10) pred = torch.cat((pred, pre.data.max(1, keepdim=True)[1].cpu()), dim=1) # loss.backward() optimizer.step() running_loss += loss.data[0] * inputs.size()[0] running_corrects += equal(pred.numpy()[:,1:], label.data.cpu().numpy().astype(int)) epoch_loss = running_loss / dataset_size epoch_acc = running_corrects / dataset_size if epoch_acc > best_acc: best_acc = epoch_acc best_model_wts = copy.deepcopy(net.state_dict()) if epoch == EPOCH-1: torch.save(best_model_wts, file_path+'/best_model_wts.pkl') print() time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Train Loss:{:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
隨機生成5000張圖片拿來訓練,准確率也會有97%左右。