前言
这个是在网上找的代码修修改改之后发现还是蛮好用的。成功率在60%左右,虽然成功率虽然有点低,但是相对来说还是蛮可以的了。
1 import re 2 from PIL import Image 3 import pytesseract 4 5 6 # 自动识别验证码 7 def get_pictures(driver): 8 # 整个页面截图的图片存放路径 9 driver.save_screenshot(r'D:\Honest\picture\poo1.png') 10 # id是验证码在页面上的id 11 pg = driver.find_element_by_id('codeImg') 12 left = pg.location['x'] 13 top = pg.location['y'] 14 right = pg.size['width'] + left 15 height = pg.size['height'] + top 16 im = Image.open(r'D:\Honest\picture\poo1.png') 17 image_obj = im.crop((left, top, right, height)) 18 # 验证码截图的图片存放路径 19 image_obj.save(r'D:\Honest\picture\poo2.png') 20 images = image_obj.convert("L") # 转灰度 21 pixdata = images.load() 22 w, h = images.size 23 # 像素值 24 threshold = 190 25 # 遍历所有像素,大于阈值的为黑色 26 for y in range(h): 27 for x in range(w): 28 if pixdata[x, y] < threshold: 29 pixdata[x, y] = 0 30 else: 31 pixdata[x, y] = 255 32 data = images.getdata() 33 w, h = images.size 34 black_point = 0 35 for x in range(1, w - 1): 36 for y in range(1, h - 1): 37 mid_pixel = data[w * y + x] # 中央像素点像素值 38 if mid_pixel < 50: # 找出上下左右四个方向像素点像素值 39 top_pixel = data[w * (y - 1) + x] 40 left_pixel = data[w * y + (x - 1)] 41 down_pixel = data[w * (y + 1) + x] 42 right_pixel = data[w * y + (x + 1)] 43 # 判断上下左右的黑色像素点总个数 44 if top_pixel < 10: 45 black_point += 1 46 if left_pixel < 10: 47 black_point += 1 48 if down_pixel < 10: 49 black_point += 1 50 if right_pixel < 10: 51 black_point += 1 52 if black_point < 1: 53 images.putpixel((x, y), 255) 54 black_point = 0 55 result = pytesseract.image_to_string(images) # 图片转文字 56 resultj = re.sub(u"([^\u4e00-\u9fa5\u0030-\u0039\u0041-\u005a\u0061-\u007a])", "", result) # 去除识别出来的特殊字符 57 result_four = resultj[0:4] # 只获取前4个字符 58 # print(result_four) # 打印识别的验证码 59 return result_four
调用例子:
from selenium import webdriver from common.common_verification import get_pictures # 方法路径 def test_a(): driver = webdriver.Chrome() driver.get(r"www.123.com") # 账号 driver.find_element_by_name('loginname').send_keys('123') # 密码 driver.find_element_by_name('password').send_keys('123') # 验证码 调用方法 get_pictures(self.driver) driver.find_element_by_id('code').send_keys(get_pictures(self.driver)) if __name__ == '__main__': test_a()