Infi-chu:
http://www.cnblogs.com/Infi-chu/
一、圖形驗證碼識別
1.使用tesserocr
import tesserocr from PIL import Image
# 在本地存儲一張驗證碼的圖片做測試
image = Image.open('test.jpg')
result = tesserocr.image_to_text(image)
print(result)
# 直接將文本轉為字符串
import tesserocr
print(tesserocr.file_to_text('test.jpg'))
2.處理驗證碼圖片
convert()方法,可將圖片轉化為灰度圖像、二值化圖像
image = image.convert('L') # 將圖像轉化為灰度圖像
image.show()
image = image.convert('1') # 將圖像轉化為二值化圖像,二值化閾值默認是127
# 現將圖片轉化成灰度圖像,再轉化成二值化圖像
image = image.convert('L')
threshold = 80 # 設定閾值
table = []
for i in range(256):
if i < threshold:
table.append(0)
else:
table.append(1)
image = image.point(table,'1')
image.show() # 圖像變得清晰
result = tesserocr.image_to_text(image)
print(result)
二、滑動驗證碼識別
滑動驗證碼就如同用一塊拼圖去在圖片中填充
1.滑動驗證碼特點:
防模擬
防偽造
防暴力
2.如何識別:
采用瀏覽器模擬驗證
3.初始化:
EMAIL = 'test@test.com'
PASSWORD = '123456'
class CrackGeetest():
def __init__(self):
self.url = 'https://account.geetest.com/login'
self.browser = webdriver.Chome()
self.wait = WebDriverWait(self.browser,20)
self.email = EMAIL
self.pasword = PASSWORD
4.模擬點擊:
# 尋找按鈕
def get_geetest_button(self):
button = self.wait.until(EC.element_to_be_clickable((BY.CLASS_NAME,'geetest_radar_tip')))
return button
# 點擊驗證按鈕
button = self.get_geetest_button()
button.click()
5.識別缺口:
首先對比原圖和現圖,利用selenium選取圖片元素,得到位置和size,然后獲取截圖
#
# 獲取位置和size
def position(self):
img = self.wait.until(EC.persence_of_element_located((By.CLASS_NAME,'geetest_canvas_img')))
time.sleep(2)
location = img.location
size = img.size
top,bottom,left,right = location['y'],location['y']+size['height'],location['x'],location['x']+size['width']
return (top,bottom,left,right)
# 獲取網頁截圖
def get_geetest_image(self,name='captcha.png'):
top,bottom,left,right = self.get_position() # 獲取圖片的位置和寬高,隨后返回左上角和右下角的坐標
print('驗證碼位置',top,bottom,left,right)
screenshot = self.get_screenshot() # 得到屏幕目標
captcha = screenshot.crop((left,top,right,bottom))
# 獲取第二張圖片(帶有缺口的圖片)
def get_slider(self):
slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'geetest_slider_button')))
return slider
# 點擊后出現接口
slider = self.get_slider()
slider.click()
# 在調用 get_geetest_image()函數獲取第二張圖,分別命名為img1和img2
'''
對比圖像的缺口,需要遍歷圖片的每一個坐標點,獲取兩張圖片對應像素點的RGB數據,如果差距在一定范圍內,則代表兩個像素相同,接着繼續對比下一個像素點。如果差距在一定范圍之外,則說明不是相同的像素點,則該位置就是缺口位置
'''
def is_pixel_equal(self,img1,img2,x,y):
# 取兩個圖片的像素點
pixel1 = img1.load()[x,y]
pixel2 = img2.load()[x,y]
threshold = 60
# 兩張圖RGB的絕對值小於定義的閾值,則代表像素點相同,繼續遍歷。否則不相同,為缺口位置
if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs(pixel1[2] - pixel2[2]) < threshold:
return True
else:
return False
def get_gap(self,img1,img2):
left = 60
for i in range(left,img1.size[0]):
for j in range(img1.size[1]):
if not self.is_pixel_equal(img1.img2,i,j): # 判斷兩個圖片的某一點的像素是否相同
left = i
return left
return left
6.模擬拖動:
def get_track():
track = []
current = 0
mid = distance * 4 / 5
t = 0.2
v = 0
while current < distance:
if current < mid:
a = 2
else:
a = -3
v0 = v
v = v0 + a * t
x = v0*t+1/2*a*t^2
move = v0*t+1/2*a*t^2
current += move
track.append(round(move))
return track
def move_to_gap(self,slider,tracks):
ActionChains(self.browser).click_and_hold(slider).perform()
for x in tracks:
ActionChains(self.browser).move_by_offset(xoffset=x,yoffset=0).perform()
time.sleep(0.3)
ActionChains(self.browser).release().perform()
1.和12306的驗證碼類似
2.思路:
文字識別、圖像識別
3.使用超級鷹平台識別
修改Python API
import requests
from hashlib import md5
class Chaojiying(obj):
def __init__(self,username,password,soft_id):
self.username=username
self.password=md5(password.encode('utf-8')).hexdigest()
self.soft_id=soft_id
self.base_params = {
'user':self.username,
'pass2':self.password,
'softid':self.soft_id,
}
self.headers = {
'Connection':'Keep-Alive',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
}
def post_pic(self,im,codetype):
params = {
'codetype':codetype,
}
params.update(self.base_params)
files = {'userfile':('test.jpg',im)}
r = requests.post('http://upload.chaojiying.net/Upload/Processing.php',data=params,files=files,headers=self.headers)
return r.json()
def report_error(self,im_id):
params = {'id':im_id,}
params.update(self.base_params)
r = requests.post('http://upload.chaojiying.net/Upload/ReportError.php',data=params,headers=self.headers)
return r.json()
4.初始化:
EMAIL = 'test@test.com'
PASSWORD = ''
CHAOJIYING_USERNAME='test'
CHAOJIYING_PASSWORD=''
CHAOJIYING_SOFT_ID=893590 # 軟件ID
CHAOJIYING_KIND=9102 # 驗證碼類型
class CrackTouClick():
def __init__(self):
self.url='輸入要識別的網站'
self.browser=webdriver.Chome()
self.wait=WebDriverWait(self.browser,20)
self.email=EMAIL
self.password=PASSWORD
self.chaojiying=Chaojiying(CHAOJIYING_USERNAME,CHAOJIYING_PASSWORD,CHAOJIYING_SOFT_ID,CHAOJIYING_KIND)
5.獲取驗證碼:
def open():
self.browser.get(self.url)
email=self.wait.until(EC.persence_of_element_located((By.ID,'email')))
password=self.wait.until(EC.persence_of_element_located((By.ID,'password')))
email.send_keys(self.password)
def get_touclick_button(self):
button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME,'touclick-hod-wrap')))
return button
def get_touclick_element(self):
element = self.wait.until(EC.persence_of_element_located((By.CLASS_NAME,'touclick-pub-content')))
return element
def get_position(self):
element=self.get_touclick_element()
time.sleep(1)
location=element.location
size=element.size
top,bottom,left,right=location['y'],location['y']+size['height'],location['x'],location['x']+size['width']
return (top,bottom,left,right)
def get_screenshot(self):
screenshot=self.browser.get_screenshot_as_png()
screenshot=Image.open(BytesIO(screenshot))
return screenshot
def get_touclick_image(self,name='captcha.png')
top,bottom,left,right=self.get_position()
print('驗證碼位置',top,bottom,left,right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left,top,right,bottom))
return captcha
6.識別驗證碼:
image = self.get_touclick_image()
bytes_array=BytesIO()
image.save(bytes_array,format='PNG')
res = self.chaojiying.post_pic(bytes_array,getvalue(),CHAOJIYING_KIND)
print(res)
def get_points(self,captcha_result):
groups=captcha_result.get('pic_str').split('|')
locations=[[int(number) for number in group.split(',')]for group in groups]
return locations
def touch_click_words(self,locations):
for location in locations:
print(location)
ActionChains(self.browser).move_to_element_with_offset(self.get_touclick_element(),location[0],location[1]).click().perform()
time.sleep(1)
