
一、前言
爬蟲最大的敵人之一是什么?沒錯,驗證碼!Geetest作為提供驗證碼服務的行家,市場占有率還是蠻高的。遇到Geetest提供的滑動驗證碼怎么破?
一種方法是分析它的js加密方法,通過大量抓包分析找到它的返回參數,直接自動生成需要的參數即可,這種方法工程量大一些,並且官方js腳本一升級,就得重新分析,耗時耗力。
今天為大家介紹的一種方法是,通過Selenium模擬用戶滑動解鎖。這個方法的優勢在於簡單,方便更新。但是它的缺點也很明顯,速度慢,並且不能制作成api接口的形式。
授人予魚不如授人予漁,接下來就為大家呈現本教程的精彩內容。不過,在閱讀本篇文章之前,請確保你已經掌握網絡爬蟲基礎,如果不具備爬蟲基礎,請到我的CSDN專欄學習。然后,再來閱讀本文,我的專欄地址:點我查看
二、先睹為快

左側顯示的為自動識別過程,右邊是一些打印信息。
三、實戰分析
我們以國家企業信用信息公式系統為例,這是一個企業信息查詢的網站,在每次查詢都需要進行一次驗證碼識別。它所使用的就是GEETEST驗證碼,它的URL:點我查看
這個網站是這個樣子的:

1、過程分析
要想把大象裝冰箱,總共分幾步?
那么,現在思考一個問題,通過Selenium模擬用戶滑動解鎖,總共分幾步?請停在這里,思考五分鍾,再繼續閱讀!
我們先公布一個粗率的答案:
- 使用Selenium打開頁面。
- 匹配到輸入框,輸入要查詢的信息,並點擊查詢按鈕。
- 讀取驗證碼圖片,並做缺口識別。
- 根據缺口位置,計算滑動距離。
- 根據滑動距離,拖拽滑塊到需要匹配的位置。
其實,將每個步驟拆分開來一點一點實現並不難,接下來進入正文。
2、實戰開始
第一步:使用Selenium打開網頁,並輸入信息,點擊查詢按鈕。
這部分內容很簡單,Selenium基礎性的東西我不再講解,如有不懂,請看我專欄的Selenium相關內容。
編寫代碼如下:
# -*-coding:utf-8 -*-
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium import webdriver
class Crack():
def __init__(self,keyword):
self.url = 'http://bj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml';
self.browser = webdriver.Chrome('D:\\chromedriver.exe')
self.wait = WebDriverWait(self.browser, 100)
self.keyword = keyword
def open(self):
"""
打開瀏覽器,並輸入查詢內容
"""
self.browser.get(self.url)
keyword = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword_qycx')))
bowton = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'btn')))
keyword.send_keys(self.keyword)
bowton.click()
def crack(self):
# 打開瀏覽器
self.open()
if __name__ == '__main__':
print('開始驗證')
crack = Crack(u'中國移動')
crack.crack()

第二步:保存驗證碼圖片
我們審查元素找打圖片的地址,審查結果如下:

可以看到,圖片是很多圖片合成的,也就是說你只保存所有地址的圖片是不行的。它是通過background-position的方法進行合成的。每一個圖片是亂的,這個怎么搞?很簡單,抓取這些圖片的鏈接,然后根據鏈接的圖片,再合成這張沒有缺口的圖片,獲取缺口圖的方法也是如此,都是自己合成。
編寫代碼如下:
# -*-coding:utf-8 -*-
import time, random
import PIL.Image as image
from io import BytesIO
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import requests, json, re, urllib
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
class Crack():
def __init__(self,keyword):
self.url = 'http://bj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml';
self.browser = webdriver.Chrome('D:\\chromedriver.exe')
self.wait = WebDriverWait(self.browser, 100)
self.keyword = keyword
self.BORDER = 6
def __del__(self):
time.sleep(2)
self.browser.close()
def get_screenshot(self):
"""
獲取網頁截圖
:return: 截圖對象
"""
screenshot = self.browser.get_screenshot_as_png()
screenshot = Image.open(BytesIO(screenshot))
return screenshot
def get_position(self):
"""
獲取驗證碼位置
:return: 驗證碼位置元組
"""
img = self.browser.find_element_by_class_name("gt_box")
time.sleep(2)
location = img.location
size = img.size
top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x']+ size['width']
return (top, bottom, left, right)
def get_image(self, name='captcha.png'):
"""
獲取驗證碼圖片
:return: 圖片對象
"""
top, bottom, left, right = self.get_position()
print('驗證碼位置', top, bottom, left, right)
screenshot = self.get_screenshot()
captcha = screenshot.crop((left, top, right, bottom))
captcha.save(name)
return captcha
def get_images(self, bg_filename = 'bg.jpg', fullbg_filename = 'fullbg.jpg'):
"""
獲取驗證碼圖片
:return: 圖片的location信息
"""
bg = []
fullgb = []
while bg == [] and fullgb == []:
bf = BeautifulSoup(self.browser.page_source, 'lxml')
bg = bf.find_all('div', class_ = 'gt_cut_bg_slice')
fullgb = bf.find_all('div', class_ = 'gt_cut_fullbg_slice')
bg_url = re.findall('url\(\"(.*)\"\);', bg[0].get('style'))[0].replace('webp', 'jpg')
fullgb_url = re.findall('url\(\"(.*)\"\);', fullgb[0].get('style'))[0].replace('webp', 'jpg')
bg_location_list = []
fullbg_location_list = []
for each_bg in bg:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][1])
bg_location_list.append(location)
for each_fullgb in fullgb:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][1])
fullbg_location_list.append(location)
urlretrieve(url = bg_url, filename = bg_filename)
print('缺口圖片下載完成')
urlretrieve(url = fullgb_url, filename = fullbg_filename)
print('背景圖片下載完成')
return bg_location_list, fullbg_location_list
def get_merge_image(self, filename, location_list):
"""
根據位置對圖片進行合並還原
:filename:圖片
:location_list:圖片位置
"""
im = image.open(filename)
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
for location in location_list:
if location['y']==-58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166)))
if location['y']==0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
new_im.save(filename)
return new_im
def open(self):
self.browser.get(self.url)
keyword = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword_qycx')))
bowton = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'btn')))
keyword.send_keys(self.keyword)
bowton.click()
def get_slider(self):
"""
獲取滑塊
:return: 滑塊對象
"""
while True:
try:
slider = self.browser.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']")
break
except:
time.sleep(0.5)
return slider
def get_gap(self, img1, img2):
"""
獲取缺口偏移量
:param img1: 不帶缺口圖片
:param img2: 帶缺口圖片
:return:
"""
left = 43
for i in range(left, img1.size[0]):
for j in range(img1.size[1]):
if not self.is_pixel_equal(img1, img2, i, j):
left = i
return left
return left
def is_pixel_equal(self, img1, img2, x, y):
"""
判斷兩個像素是否相同
:param image1: 圖片1
:param image2: 圖片2
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
"""
# 取兩個圖片的像素點
pix1 = img1.load()[x, y]
pix2 = img2.load()[x, y]
threshold = 60
if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) andabs(pix1[2] - pix2[2] < threshold)):
return True
else:
return False
def get_track(self, distance):
"""
根據偏移量獲取移動軌跡
:param distance: 偏移量
:return: 移動軌跡
"""
# 移動軌跡
track = []
# 當前位移
current = 0
# 減速閾值
mid = distance * 4 / 5
# 計算間隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
# 加速度為正2
a = 2
else:
# 加速度為負3
a = -3
# 初速度v0
v0 = v
# 當前速度v = v0 + at
v = v0 + a * t
# 移動距離x = v0t + 1/2 * a * t^2
move = v0 * t + 1 / 2 * a * t * t
# 當前位移
current += move
# 加入軌跡
track.append(round(move))
return track
def move_to_gap(self, slider, track):
"""
拖動滑塊到缺口處
:param slider: 滑塊
:param track: 軌跡
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
while track:
x = random.choice(track)
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
track.remove(x)
time.sleep(0.5)
ActionChains(self.browser).release().perform()
def crack(self):
# 打開瀏覽器
self.open()
# 保存的圖片名字
bg_filename = 'bg.jpg'
fullbg_filename = 'fullbg.jpg'
# 獲取圖片
bg_location_list, fullbg_location_list = self.get_images(bg_filename, fullbg_filename)
# 根據位置對圖片進行合並還原
bg_img = self.get_merge_image(bg_filename, bg_location_list)
fullbg_img = self.get_merge_image(fullbg_filename, fullbg_location_list)
# 點按呼出缺口
slider = self.get_slider()
# 獲取缺口位置
gap = self.get_gap(fullbg_img, bg_img)
print('缺口位置', gap)
track = self.get_track(gap-self.BORDER)
print('滑動滑塊')
print(track)
self.move_to_gap(slider, track)
if __name__ == '__main__':
print('開始驗證')
crack = Crack(u'中國移動')
crack.crack()
print('驗證成功')
運行效果如下:

可以看到,運行之后,我們已經順利生成了兩張圖片,一個是缺口圖,另一個是非缺口圖。
第三步:計算缺口距離
根據缺口圖和非缺口圖,通過比對圖像的像素點的大小區別,找到缺口位置。
編寫代碼如下:
# -*-coding:utf-8 -*-
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from urllib.request import urlretrieve
from selenium import webdriver
from bs4 import BeautifulSoup
import PIL.Image as image
import re
class Crack():
def __init__(self,keyword):
self.url = 'http://bj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml'
self.browser = webdriver.Chrome('D:\\chromedriver.exe')
self.wait = WebDriverWait(self.browser, 100)
self.keyword = keyword
def open(self):
"""
打開瀏覽器,並輸入查詢內容
"""
self.browser.get(self.url)
keyword = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword_qycx')))
bowton = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'btn')))
keyword.send_keys(self.keyword)
bowton.click()
def get_images(self, bg_filename = 'bg.jpg', fullbg_filename = 'fullbg.jpg'):
"""
獲取驗證碼圖片
:return: 圖片的location信息
"""
bg = []
fullgb = []
while bg == [] and fullgb == []:
bf = BeautifulSoup(self.browser.page_source, 'lxml')
bg = bf.find_all('div', class_ = 'gt_cut_bg_slice')
fullgb = bf.find_all('div', class_ = 'gt_cut_fullbg_slice')
bg_url = re.findall('url\(\"(.*)\"\);', bg[0].get('style'))[0].replace('webp', 'jpg')
fullgb_url = re.findall('url\(\"(.*)\"\);', fullgb[0].get('style'))[0].replace('webp', 'jpg')
bg_location_list = []
fullbg_location_list = []
for each_bg in bg:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][1])
bg_location_list.append(location)
for each_fullgb in fullgb:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][1])
fullbg_location_list.append(location)
urlretrieve(url = bg_url, filename = bg_filename)
print('缺口圖片下載完成')
urlretrieve(url = fullgb_url, filename = fullbg_filename)
print('背景圖片下載完成')
return bg_location_list, fullbg_location_list
def get_merge_image(self, filename, location_list):
"""
根據位置對圖片進行合並還原
:filename:圖片
:location_list:圖片位置
"""
im = image.open(filename)
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
for location in location_list:
if location['y'] == -58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x']) + 10, 166)))
if location['y'] == 0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x']) + 10, 58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
new_im.save(filename)
return new_im
def get_merge_image(self, filename, location_list):
"""
根據位置對圖片進行合並還原
:filename:圖片
:location_list:圖片位置
"""
im = image.open(filename)
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
for location in location_list:
if location['y']==-58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166)))
if location['y']==0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
new_im.save(filename)
return new_im
def get_gap(self, img1, img2):
"""
獲取缺口偏移量
:param img1: 不帶缺口圖片
:param img2: 帶缺口圖片
:return:
"""
left = 43
for i in range(left, img1.size[0]):
for j in range(img1.size[1]):
if not self.is_pixel_equal(img1, img2, i, j):
left = i
return left
return left
def crack(self):
# 打開瀏覽器
self.open()
# 保存的圖片名字
bg_filename = 'bg.jpg'
fullbg_filename = 'fullbg.jpg'
# 獲取圖片
bg_location_list, fullbg_location_list = self.get_images(bg_filename, fullbg_filename)
# 根據位置對圖片進行合並還原
bg_img = self.get_merge_image(bg_filename, bg_location_list)
fullbg_img = self.get_merge_image(fullbg_filename, fullbg_location_list)
# 獲取缺口位置
gap = self.get_gap(fullbg_img, bg_img)
print('缺口位置', gap)
if __name__ == '__main__':
print('開始驗證')
crack = Crack(u'中國移動')
crack.crack()
運行結果如下:

這樣我們就計算除了缺口位置,接下來就是根據缺口位置,滑動滑塊到相應位置。
第四步:計算滑動軌跡
我們可以使用瞬間移動,直接在1s內移動到目標位置,結果就是”被吃了“。

勻速直線運動,勻速直線運動大法好!果不其然,還是”被吃了“,繼續嘗試。

模仿抖抖病患者運動,顫顫巍巍,如履薄冰,估計geetest服務器認為是我外婆在操作吧。

雖然這個方法偶爾會成功,但成功率極低。最好的方法是什么呢?
模擬人的運動!你想一下,人在滑動滑塊的初期是不是速度快,但是當要接近缺口位置的時候,會減速,因為我得對准缺口位置啊!這怎么實現呢?使用我們初中學過的物理知識:
當前速度公式為:
v = v0 + a * t
其中,v是當前速度,v0是初始速度,a是加速度,t是時間。我們剛開始的讓加速大,當過了中間位置,降低加速度。使用這個移動過程,移動滑塊到缺口位置。
編寫代碼如下:
# -*-coding:utf-8 -*-
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from urllib.request import urlretrieve
from selenium import webdriver
from bs4 import BeautifulSoup
import PIL.Image as image
import re
class Crack():
def __init__(self,keyword):
self.url = 'http://bj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml'
self.browser = webdriver.Chrome('D:\\chromedriver.exe')
self.wait = WebDriverWait(self.browser, 100)
self.keyword = keyword
self.BORDER = 6
def open(self):
"""
打開瀏覽器,並輸入查詢內容
"""
self.browser.get(self.url)
keyword = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword_qycx')))
bowton = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'btn')))
keyword.send_keys(self.keyword)
bowton.click()
def get_images(self, bg_filename = 'bg.jpg', fullbg_filename = 'fullbg.jpg'):
"""
獲取驗證碼圖片
:return: 圖片的location信息
"""
bg = []
fullgb = []
while bg == [] and fullgb == []:
bf = BeautifulSoup(self.browser.page_source, 'lxml')
bg = bf.find_all('div', class_ = 'gt_cut_bg_slice')
fullgb = bf.find_all('div', class_ = 'gt_cut_fullbg_slice')
bg_url = re.findall('url\(\"(.*)\"\);', bg[0].get('style'))[0].replace('webp', 'jpg')
fullgb_url = re.findall('url\(\"(.*)\"\);', fullgb[0].get('style'))[0].replace('webp', 'jpg')
bg_location_list = []
fullbg_location_list = []
for each_bg in bg:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][1])
bg_location_list.append(location)
for each_fullgb in fullgb:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][1])
fullbg_location_list.append(location)
urlretrieve(url = bg_url, filename = bg_filename)
print('缺口圖片下載完成')
urlretrieve(url = fullgb_url, filename = fullbg_filename)
print('背景圖片下載完成')
return bg_location_list, fullbg_location_list
def get_merge_image(self, filename, location_list):
"""
根據位置對圖片進行合並還原
:filename:圖片
:location_list:圖片位置
"""
im = image.open(filename)
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
for location in location_list:
if location['y'] == -58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x']) + 10, 166)))
if location['y'] == 0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x']) + 10, 58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
new_im.save(filename)
return new_im
def get_merge_image(self, filename, location_list):
"""
根據位置對圖片進行合並還原
:filename:圖片
:location_list:圖片位置
"""
im = image.open(filename)
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
for location in location_list:
if location['y']==-58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166)))
if location['y']==0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
new_im.save(filename)
return new_im
def is_pixel_equal(self, img1, img2, x, y):
"""
判斷兩個像素是否相同
:param image1: 圖片1
:param image2: 圖片2
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
"""
# 取兩個圖片的像素點
pix1 = img1.load()[x, y]
pix2 = img2.load()[x, y]
threshold = 60
if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) andabs(pix1[2] - pix2[2] < threshold)):
return True
else:
return False
def get_gap(self, img1, img2):
"""
獲取缺口偏移量
:param img1: 不帶缺口圖片
:param img2: 帶缺口圖片
:return:
"""
left = 43
for i in range(left, img1.size[0]):
for j in range(img1.size[1]):
if not self.is_pixel_equal(img1, img2, i, j):
left = i
return left
return left
def get_track(self, distance):
"""
根據偏移量獲取移動軌跡
:param distance: 偏移量
:return: 移動軌跡
"""
# 移動軌跡
track = []
# 當前位移
current = 0
# 減速閾值
mid = distance * 4 / 5
# 計算間隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
# 加速度為正2
a = 2
else:
# 加速度為負3
a = -3
# 初速度v0
v0 = v
# 當前速度v = v0 + at
v = v0 + a * t
# 移動距離x = v0t + 1/2 * a * t^2
move = v0 * t + 1 / 2 * a * t * t
# 當前位移
current += move
# 加入軌跡
track.append(round(move))
return track
def crack(self):
# 打開瀏覽器
self.open()
# 保存的圖片名字
bg_filename = 'bg.jpg'
fullbg_filename = 'fullbg.jpg'
# 獲取圖片
bg_location_list, fullbg_location_list = self.get_images(bg_filename, fullbg_filename)
# 根據位置對圖片進行合並還原
bg_img = self.get_merge_image(bg_filename, bg_location_list)
fullbg_img = self.get_merge_image(fullbg_filename, fullbg_location_list)
# 獲取缺口位置
gap = self.get_gap(fullbg_img, bg_img)
print('缺口位置', gap)
track = self.get_track(gap-self.BORDER)
print('滑動滑塊')
print(track)
if __name__ == '__main__':
print('開始驗證')
crack = Crack(u'中國移動')
crack.crack()
運行效果如下:

第五步:移動滑塊
根據返回的每次滑動的距離,我們移動滑塊至缺口位置。
編寫代碼如下:
# -*-coding:utf-8 -*-
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from urllib.request import urlretrieve
from selenium import webdriver
from bs4 import BeautifulSoup
import PIL.Image as image
import re
class Crack():
def __init__(self,keyword):
self.url = 'http://bj.gsxt.gov.cn/sydq/loginSydqAction!sydq.dhtml'
self.browser = webdriver.Chrome('D:\\chromedriver.exe')
self.wait = WebDriverWait(self.browser, 100)
self.keyword = keyword
self.BORDER = 6
def open(self):
"""
打開瀏覽器,並輸入查詢內容
"""
self.browser.get(self.url)
keyword = self.wait.until(EC.presence_of_element_located((By.ID, 'keyword_qycx')))
bowton = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'btn')))
keyword.send_keys(self.keyword)
bowton.click()
def get_images(self, bg_filename = 'bg.jpg', fullbg_filename = 'fullbg.jpg'):
"""
獲取驗證碼圖片
:return: 圖片的location信息
"""
bg = []
fullgb = []
while bg == [] and fullgb == []:
bf = BeautifulSoup(self.browser.page_source, 'lxml')
bg = bf.find_all('div', class_ = 'gt_cut_bg_slice')
fullgb = bf.find_all('div', class_ = 'gt_cut_fullbg_slice')
bg_url = re.findall('url\(\"(.*)\"\);', bg[0].get('style'))[0].replace('webp', 'jpg')
fullgb_url = re.findall('url\(\"(.*)\"\);', fullgb[0].get('style'))[0].replace('webp', 'jpg')
bg_location_list = []
fullbg_location_list = []
for each_bg in bg:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_bg.get('style'))[0][1])
bg_location_list.append(location)
for each_fullgb in fullgb:
location = {}
location['x'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][0])
location['y'] = int(re.findall('background-position: (.*)px (.*)px;',each_fullgb.get('style'))[0][1])
fullbg_location_list.append(location)
urlretrieve(url = bg_url, filename = bg_filename)
print('缺口圖片下載完成')
urlretrieve(url = fullgb_url, filename = fullbg_filename)
print('背景圖片下載完成')
return bg_location_list, fullbg_location_list
def get_merge_image(self, filename, location_list):
"""
根據位置對圖片進行合並還原
:filename:圖片
:location_list:圖片位置
"""
im = image.open(filename)
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
for location in location_list:
if location['y'] == -58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x']) + 10, 166)))
if location['y'] == 0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x']) + 10, 58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
new_im.save(filename)
return new_im
def get_merge_image(self, filename, location_list):
"""
根據位置對圖片進行合並還原
:filename:圖片
:location_list:圖片位置
"""
im = image.open(filename)
new_im = image.new('RGB', (260,116))
im_list_upper=[]
im_list_down=[]
for location in location_list:
if location['y']==-58:
im_list_upper.append(im.crop((abs(location['x']),58,abs(location['x'])+10,166)))
if location['y']==0:
im_list_down.append(im.crop((abs(location['x']),0,abs(location['x'])+10,58)))
new_im = image.new('RGB', (260,116))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset,0))
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset,58))
x_offset += im.size[0]
new_im.save(filename)
return new_im
def is_pixel_equal(self, img1, img2, x, y):
"""
判斷兩個像素是否相同
:param image1: 圖片1
:param image2: 圖片2
:param x: 位置x
:param y: 位置y
:return: 像素是否相同
"""
# 取兩個圖片的像素點
pix1 = img1.load()[x, y]
pix2 = img2.load()[x, y]
threshold = 60
if (abs(pix1[0] - pix2[0] < threshold) and abs(pix1[1] - pix2[1] < threshold) andabs(pix1[2] - pix2[2] < threshold)):
return True
else:
return False
def get_gap(self, img1, img2):
"""
獲取缺口偏移量
:param img1: 不帶缺口圖片
:param img2: 帶缺口圖片
:return:
"""
left = 43
for i in range(left, img1.size[0]):
for j in range(img1.size[1]):
if not self.is_pixel_equal(img1, img2, i, j):
left = i
return left
return left
def get_track(self, distance):
"""
根據偏移量獲取移動軌跡
:param distance: 偏移量
:return: 移動軌跡
"""
# 移動軌跡
track = []
# 當前位移
current = 0
# 減速閾值
mid = distance * 4 / 5
# 計算間隔
t = 0.2
# 初速度
v = 0
while current < distance:
if current < mid:
# 加速度為正2
a = 2
else:
# 加速度為負3
a = -3
# 初速度v0
v0 = v
# 當前速度v = v0 + at
v = v0 + a * t
# 移動距離x = v0t + 1/2 * a * t^2
move = v0 * t + 1 / 2 * a * t * t
# 當前位移
current += move
# 加入軌跡
track.append(round(move))
return track
def get_slider(self):
"""
獲取滑塊
:return: 滑塊對象
"""
while True:
try:
slider = self.browser.find_element_by_xpath("//div[@class='gt_slider_knob gt_show']")
break
except:
time.sleep(0.5)
return slider
def move_to_gap(self, slider, track):
"""
拖動滑塊到缺口處
:param slider: 滑塊
:param track: 軌跡
:return:
"""
ActionChains(self.browser).click_and_hold(slider).perform()
while track:
x = random.choice(track)
ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform()
track.remove(x)
time.sleep(0.5)
ActionChains(self.browser).release().perform()
def crack(self):
# 打開瀏覽器
self.open()
# 保存的圖片名字
bg_filename = 'bg.jpg'
fullbg_filename = 'fullbg.jpg'
# 獲取圖片
bg_location_list, fullbg_location_list = self.get_images(bg_filename, fullbg_filename)
# 根據位置對圖片進行合並還原
bg_img = self.get_merge_image(bg_filename, bg_location_list)
fullbg_img = self.get_merge_image(fullbg_filename, fullbg_location_list)
# 獲取缺口位置
gap = self.get_gap(fullbg_img, bg_img)
print('缺口位置', gap)
track = self.get_track(gap-self.BORDER)
print('滑動滑塊')
print(track)
# 點按呼出缺口
slider = self.get_slider()
# 拖動滑塊到缺口處
self.move_to_gap(slider, track)
if __name__ == '__main__':
print('開始驗證')
crack = Crack(u'中國移動')
crack.crack()
print('驗證成功')
運行上述代碼,即實現滑動驗證碼破解,再看下那個nice的瞬間吧。

五、總結
如果破解極驗驗證碼困難 可以有一個挺好的解決方案 http://jiyandoc.c2567.com/