數字加減法驗證碼 識別 使用K-近鄰算法(KNN)


驗證碼如上所示

100*30

下面咱們開始神奇的旅程

 下載批量驗證碼圖片數據集用來訓練   

此驗證碼比較簡單就下載了500

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author: zhibo.wang
# E-mail: gm.zhibo.wang@gmail.com
# Date  : 20/06/29 10:06:45
# Desc  :


import random
import requests
import uuid
import time
import os

def is_exists(path_):
    if not os.path.exists(path_):
        os.makedirs(path_)


is_exists("source/")

for i in range(0, 500):
    print(i)
    url = 'https://www.okcis.cn/php/checkUser/code.php'
    resp = requests.get(url, headers={
        "user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/75.0.3770.90 Chrome/75.0.3770.90 Safari/537.36"})
    with open('source/' + str(uuid.uuid4()) + '.png', 'wb') as f:
        f.write(resp.content)
    time.sleep(0.1)

 二值化並切割驗證碼

 
         
#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author: zhibo.wang
# E-mail: gm.zhibo.wang@gmail.com
# Date  : 20/06/29 10:06:45
# Desc  :

import
os import uuid import numpy as np from PIL import Image import time def is_exists(path_): if not os.path.exists(path_): os.makedirs(path_) is_exists("letters/") def splitAndSave(path): path = './source/' + path pix = np.array(Image.open(path).convert('L')) pix = (pix > 200) * 255 split_parts = [ [3, 23], [23, 43], [43, 63] ] for part in split_parts: letter = pix[0:, part[0]: part[1]] im = Image.fromarray(np.uint8(letter)) save_path = './letters/' + str(uuid.uuid4()) + '.png' print('\t', save_path) im.save(save_path) if __name__ == '__main__': im_paths = filter(lambda fn: os.path.splitext(fn)[1].lower() == '.png', os.listdir('./source')) for im_path in im_paths: print(im_path) splitAndSave(im_path)

給切割好的數據打標簽

每次選中一個類型的數據放入復制粘貼到train 文件夾下  然后 修改 n 字段進行每個類別的自動修改

#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author: zhibo.wang
# E-mail: gm.zhibo.wang@gmail.com
# Date  : 20/06/29 11:36:11
# Desc  :

import os

def list_all_files(rootdir):
    _files = []
    try:
        list = os.listdir(rootdir)
        for i in range(0, len(list)):
            path = os.path.join(rootdir, list[i])
            if os.path.isdir(path):
                _files.extend(list_all_files(path))
            if os.path.isfile(path):
                _files.append(path)
    except Exception:
        pass
    return _files


def rename_filename(filename, newfilename):
    os.rename(filename, newfilename)



def run():
    n = "10"
    path_ = 'train'
    all_files = [i for i in list_all_files(path_) if len(i.split("/")[-1]) == 40]
    for i in range(0, len(all_files)):
        file_name = all_files[i]
        new_file_name = "{0}/{1}:{2}.png".format(path_, n, i)   # win文件名不能有:請自行修改
        print(new_file_name)
        rename_filename(file_name, new_file_name)


if __name__ == "__main__":
    run()

數據打標簽完成開始訓練模型

訓練數據 knn

 

 
         
#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author: zhibo.wang
# E-mail: gm.zhibo.wang@gmail.com
# Date  : 20/06/29 10:06:45
# Desc  :

import
os from PIL import Image import numpy as np import joblib from sklearn.neighbors import KNeighborsClassifier from utils import list_all_files def list_all_files(rootdir): _files = [] try: list = os.listdir(rootdir) for i in range(0, len(list)): path = os.path.join(rootdir, list[i]) if os.path.isdir(path): _files.extend(list_all_files(path)) if os.path.isfile(path): _files.append(path) except Exception: pass return _files def load_dataset(): X = [] y = [] path_ = "train" all_flies = list_all_files(path_) keys = [str(i) for i in range(1, 11)] + ["+", "-"] for k in keys: for file_ in all_flies: if file_.split("/")[-1].split(":")[0] == str(k): path = file_ pix = np.asarray(Image.open(path).convert("L")) X.append(pix.reshape(20*30)) y.append(k) return np.asarray(X), np.asarray(y) if __name__ == "__main__": X, y = load_dataset() knn = KNeighborsClassifier() knn.fit(X, y) joblib.dump(knn, 'knn.pkl')

訓練完成下來測試下效果

 
         
#!/usr/bin/env python
# -*- coding:utf-8 -*-

# Author: zhibo.wang
# E-mail: gm.zhibo.wang@gmail.com
# Date  : 20/06/29 10:06:45
# Desc  :


import
os import numpy as np from PIL import Image import joblib def split_letters(path): pix = np.array(Image.open(path).convert('L')) pix = (pix > 200) * 255 split_parts = [ [3, 23], [23, 43], [43, 63] ] letters = [] for part in split_parts: letter = pix[0:, part[0]: part[1]] letters.append(letter.reshape(20*30)) return letters def get_captcha_result(model_path, filename): sipo_knn = joblib.load(model_path) letters = split_letters(filename) return "".join([str(i) for i in sipo_knn.predict(letters)]) if __name__ == "__main__": for test in os.listdir('./test'): datas = test, get_captcha_result('knn.pkl', './test/' + test) print(datas)

 

結果挺滿意的百分百識別正確

下來就是進行簡單的計算了   此處略。。。。。。。。。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM