Pytorch之驗證碼識別


本文主要實現了兩個工作:1.驗證碼生成   2.Pytorch識別驗證碼

 

一. 驗證碼生成

方法1. 利用PIL庫的ImageDraw實現繪圖,此法參考博客實現:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Mar 27 15:45:04 2018

@author: lps
"""

from PIL import Image, ImageDraw, ImageFont, ImageFilter
import random
import cv2
import numpy as np
import matplotlib.pyplot as plt
path = '/media/lps/python-3.5.2.amd64/Lib/site-packages/matplotlib/mpl-data/fonts/ttf/'     # 選擇字體
data_path = '/home/lps/yanzm/'

# random chr
def rndChar():
      return chr(random.randint(65, 90))     # 隨機字母

def rndInt():
      return str(random.randint(0,9))        # 隨機數字

def rndColor():
      return (random.randint(64, 255), random.randint(64, 255), random.randint(64, 255))   # 隨機顏色

def rndColor2():
      return (random.randint(32, 127), random.randint(32, 127), random.randint(32, 127))   # 隨機顏色

def gaussian_noise():   # 高斯噪聲
      mu =  125
      sigma = 20
      return tuple((np.random.normal(mu, sigma, 3).astype(int)))

def rotate(x, angle):  # 旋轉
    M_rotate = cv2.getRotationMatrix2D((x.shape[0]/2, x.shape[1]/2), angle, 1)
    x = cv2.warpAffine(x, M_rotate, (x.shape[0], x.shape[1]))
    return x
    
width = 180 * 4
height = 180

def gen_image(num):
      
      for l in range(num): 
      
           image = Image.new('RGB', (width, height), (255, 255, 255))   # 先生成一張大圖
    
           font = ImageFont.truetype(path+'cmb10.ttf', 36)
 
           draw = ImageDraw.Draw(image)    # 新的畫板

           for x in range(0,width):
                 for y in range(0,height):
                       draw.point((x, y), fill=rndColor())
            
           label = []
           
           for t in range(4):    # 每一張驗證碼4個數字
                 numb = rndInt()
                 draw.text((180 * t + 60+10, 60+10), numb, font=font, fill=rndColor2())
                 label.append(numb)
                 
           with open(data_path+"label.txt","a") as f:
                 for s in label:
                       f.write(s + ' ')
                 f.writelines("\n")     # 寫入label
                 
            
           img = image.filter(ImageFilter.GaussianBlur(radius=0.5))
           img = np.array(img)
      
           img1 = np.array([])

           for i in range(0,4):
                 img0 = img[:, 180*i: 180*i+180]   # 提取含有驗證碼的小圖
                 angle = random.randint(-45, 45)
                 img0 = rotate(img0, angle)    # 對小圖隨機旋轉
            
                 if img1.any():
                      img1 = np.concatenate((img1, img0[60:120, 60:120, :]), axis=1)
            
                 else:
                      img1 = img0[60:120, 60:120, :]
            
           plt.imsave(data_path+'src/' + str(l)+'.jpg', img1)     # 保存結果
      
      
if __name__=='__main__':
      gen_image(100)

    
View Code

結果大致:

 

方法2. 利用更專業的庫:captcha

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Mar 25 19:06:46 2018

@author: lps
"""
from captcha.image import ImageCaptcha
import numpy as np
#import matplotlib.pyplot as plt
from PIL import Image
import random 
import cv2

number = ['0','1','2','3','4','5','6','7','8','9']
alphabet = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z']
ALPHABET = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']

data_path = '/home/lps/yanzm/'

def random_captcha_text(char_set=number,captcha_size=4):    # 可以設置只用來生成數字
    captcha_text = []
    for i in range(captcha_size):
        c = random.choice(char_set)
        captcha_text.append(c)
    return captcha_text

def gen_capthcha_text_and_image(m):
    image = ImageCaptcha()
    captcha_text = random_captcha_text()     # 生成數字
    captcha_text = ' '.join(captcha_text)  # 生成標簽
    
    captcha = image.generate(captcha_text)
    
#    image.write(captcha_text,captcha_text+'.jpg')
    
    captcha_image = Image.open(captcha)
    captcha_image = np.array(captcha_image)
    
    with open(data_path+"label.txt","a") as f:     # 寫入標簽
            f.write(captcha_text)
            f.writelines("\n")  
    cv2.imwrite(data_path + '/src/'+'%.4d.jpg'%m,  captcha_image)   # 保存
    
#    return captcha_text,captcha_image

if __name__ == '__main__':
    
    for m in range(0,5000):
#          text,image = gen_capthcha_text_and_image()
          gen_capthcha_text_and_image(m)
    
    
#    f = plt.figure()
#    ax = f.add_subplot(212)
#    ax.text(0.1,0.1,text,ha='center',va='center',transform=ax.transAxes)
#    plt.imshow(image)
#    plt.show()
#    
View Code

結果大致:

 

二. pytorch實現

         對於一張驗證碼來說作為一張單一的圖片,每輸入一張圖片,得到四個數字作為輸出,只有4個數字同時預測正確才表示預測正確。所以在每一張圖上是四個多二分類器:因為驗證碼上面的數字為0-9,類似於mnist,只不過此時一張圖片對應於4個數字。所以思路很簡單,實現如下:

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Mar 30 15:46:09 2018

@author: lps
"""
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable 
import torch.optim as optim
import torchvision.models as models
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils
import matplotlib.pyplot as plt
from PIL import Image
#import pandas as pd
import numpy as np
import os
import copy, time


file_path = '/home/lps/yanzm'
BATCH_SIZE = 16
EPOCH = 10

# Load data
class dataset(Dataset):
      
      def __init__(self, root_dir, label_file, transform=None):
            
            self.root_dir = root_dir
            self.label = np.loadtxt(label_file)
            self.transform = transform
            
      def __getitem__(self, idx):
            
            img_name = os.path.join(self.root_dir,'%.4d.jpg'%idx)
            image = Image.open(img_name)
            labels = self.label[idx,:]
            
#            sample = image
            
            if self.transform:
                 image = self.transform(image)
            
            return image, labels
            
      def __len__(self):
            
            return (self.label.shape[0])
            

data = dataset(file_path+'/src', file_path+'/label.txt',transform=transforms.ToTensor())

dataloader = DataLoader(data, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, drop_last=True)

dataset_size = len(data)


# Conv network
class ConvNet(nn.Module):
      
      def __init__(self):
            super(ConvNet, self).__init__()
            self.conv =nn.Sequential(
                        nn.Conv2d(3, 32, kernel_size=4, stride=1, padding=2), # in:(bs,3,60,160)
                        nn.BatchNorm2d(32),
                        nn.LeakyReLU(0.2, inplace=True),     
                        nn.MaxPool2d(kernel_size=2),        # out:(bs,32,30,80)
                        
                        nn.Conv2d(32, 64, kernel_size=4, stride=1, padding=2),
                        nn.BatchNorm2d(64),
                        nn.LeakyReLU(0.2, inplace=True),
                        nn.MaxPool2d(kernel_size=2),        # out:(bs,64,15,40)
                        
                        nn.Conv2d(64, 64, kernel_size=3 ,stride=1, padding=1),
                        nn.BatchNorm2d(64),
                        nn.LeakyReLU(0.2, inplace=True),     
                        nn.MaxPool2d(kernel_size=2)         # out:(bs,64,7,20)
                    )
      
            self.fc1 = nn.Linear(64*7*20, 500)
            self.fc2 = nn.Linear(500,40)
      
      def forward(self, x):
            x = self.conv(x)
            x = x.view(x.size(0), -1)    # reshape to (batch_size, 64 * 7 * 30)
            output = self.fc1(x)
            output = self.fc2(output)
            
            return output


# Train the net
class nCrossEntropyLoss(torch.nn.Module):

      def __init__(self, n=4):
            super(nCrossEntropyLoss, self).__init__()
            self.n = n
            self.total_loss = 0
            self.loss = nn.CrossEntropyLoss()
        
      def forward(self, output, label):
            output_t = output[:,0:10]
            label = Variable(torch.LongTensor(label.data.cpu().numpy())).cuda()
            label_t = label[:,0]
            
            for i in range(1, self.n):
                  output_t = torch.cat((output_t, output[:,10*i:10*i+10]), 0)   # 損失的思路是將一張圖平均剪切為4張小圖即4個多分類,然后再用多分類交叉熵方損失
                  label_t = torch.cat((label_t, label[:,i]), 0)         
                  self.total_loss = self.loss(output_t, label_t)
            
            return self.total_loss


def equal(np1,np2):
      
      n = 0
      for i in range(np1.shape[0]):
            if (np1[i,:]==np2[i,:]).all():
                  n += 1 
            
      return n
      

net = ConvNet().cuda()
optimizer = torch.optim.Adam(net.parameters(), lr=0.001)
#loss_func = nn.CrossEntropyLoss()
loss_func = nCrossEntropyLoss()

best_model_wts = copy.deepcopy(net.state_dict())  
best_acc = 0.0   

since = time.time()
for epoch in range(EPOCH):
      
      running_loss=0.0
      running_corrects=0
      
      for step,(inputs,label) in enumerate(dataloader):
            
            pred = torch.LongTensor(BATCH_SIZE,1).zero_()
            inputs = Variable(inputs).cuda()   # (bs, 3, 60, 240)
            label = Variable(label).cuda()   # (bs, 4)
            
            optimizer.zero_grad() 
            
            output = net(inputs)   # (bs, 40)
            loss = loss_func(output, label) 
            
            for i in range(4):
                  pre = F.log_softmax(output[:,10*i:10*i+10], dim=1)  # (bs, 10)
                  pred = torch.cat((pred, pre.data.max(1, keepdim=True)[1].cpu()), dim=1)    # 
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.data[0] * inputs.size()[0]
            running_corrects += equal(pred.numpy()[:,1:], label.data.cpu().numpy().astype(int))
                        
      epoch_loss = running_loss / dataset_size
      epoch_acc  = running_corrects / dataset_size
                  
      if  epoch_acc > best_acc:   
            best_acc = epoch_acc
            best_model_wts = copy.deepcopy(net.state_dict())  
            
      if epoch == EPOCH-1:
            torch.save(best_model_wts, file_path+'/best_model_wts.pkl')
            
      print()

      time_elapsed = time.time() - since
      print('Training complete in {:.0f}m {:.0f}s'.format(
                   time_elapsed // 60, time_elapsed % 60))
      print('Train Loss:{:.4f} Acc: {:.4f}'.format(epoch_loss, epoch_acc))
           
  

隨機生成5000張圖片拿來訓練,准確率也會有97%左右。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM