1.圖形驗證碼:
中國知網:http://my.cnki.net/elibRegister/CommonRegister.aspx
1 import tesserocr 2 from PIL import Image 3 4 image = Image.open('code2.jpg') 5 image = image.convert('L') 6 threshold = 180 7 table = [] 8 for i in range(256): 9 if i < threshold: 10 table.append(0) 11 else: 12 table.append(1) 13 14 image = image.point(table, '1') 15 #image = image.convert('1') 16 #image.show() 17 18 result = tesserocr.image_to_text(image) 19 print(result)
2. 極驗滑動驗證碼的識別
https://www.geetest.com/Sensebot
對於應用了極驗驗證碼的網站如果我們直接模擬表單提交,加密參數的構造是個問題,需要分析其加密和校驗邏輯,相對煩瑣 。 所以我們采用直接模擬瀏覽器動作的方式來完成驗證 。
可以使用 Selenium來完全模擬人的行為的方式來完成驗證,此驗證成本相比直接去識別加密算法少很多 。
https://account.geetest.com/login
(I)模擬點擊驗證按鈕。
(2)識別附動缺口的位置 。
(3)模擬拖動滑塊 。
1 import time 2 from io import BytesIO 3 from PIL import Image 4 from selenium import webdriver 5 from selenium.webdriver import ActionChains 6 from selenium.webdriver.common.by import By 7 from selenium.webdriver.support.ui import WebDriverWait 8 from selenium.webdriver.support import expected_conditions as EC 9 10 EMAIL = 'zcs@163.com' 11 PASSWORD = '123' 12 BORDER = 6 13 #INIT_LEFT = 60 14 15 16 class CrackGeetest(): 17 def __init__(self): 18 self.url = 'https://account.geetest.com/login' 19 self.browser = webdriver.Chrome() 20 self.wait = WebDriverWait(self.browser, 20) 21 self.email = EMAIL 22 self.password = PASSWORD 23 24 def __del__(self): 25 self.browser.close() 26 27 def get_geetest_button(self): 28 """ 29 獲取初始驗證按鈕 30 :return: 31 """ 32 button = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_radar_tip'))) 33 return button 34 35 def get_position(self): 36 """ 37 獲取驗證碼位置 38 :return: 驗證碼位置元組 39 """ 40 img = self.wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'geetest_canvas_img'))) 41 time.sleep(2) 42 location = img.location 43 size = img.size 44 top, bottom, left, right = location['y'], location['y'] + size['height'], location['x'], location['x'] + size[ 45 'width'] 46 return (top, bottom, left, right) 47 48 def get_screenshot(self): 49 """ 50 獲取網頁截圖 51 :return: 截圖對象 52 """ 53 screenshot = self.browser.get_screenshot_as_png() 54 screenshot = Image.open(BytesIO(screenshot)) 55 return screenshot 56 57 def get_slider(self): 58 """ 59 獲取滑塊 60 :return: 滑塊對象 61 """ 62 slider = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'geetest_slider_button'))) 63 return slider 64 65 def get_geetest_image(self, name='captcha.png'): 66 """ 67 獲取驗證碼圖片 68 :return: 圖片對象 69 """ 70 top, bottom, left, right = self.get_position() 71 print('驗證碼位置', top, bottom, left, right) 72 screenshot = self.get_screenshot() 73 # crop將圖片裁剪 74 captcha = screenshot.crop((left, top, right, bottom)) 75 captcha.save(name) 76 return captcha 77 78 def open(self): 79 """ 80 打開網頁輸入用戶名密碼 81 :return: None 82 """ 83 self.browser.get(self.url) 84 email = self.wait.until(EC.presence_of_element_located((By.ID, 'email'))) 85 password = self.wait.until(EC.presence_of_element_located((By.ID, 'password'))) 86 email.send_keys(self.email) 87 password.send_keys(self.password) 88 89 def get_gap(self, image1, image2): 90 """ 91 獲取缺口偏移量 92 :param image1: 不帶缺口圖片 93 :param image2: 帶缺口圖片 94 :return: 95 """ 96 left = 60 97 for i in range(left, image1.size[0]): 98 for j in range(image1.size[1]): 99 if not self.is_pixel_equal(image1, image2, i, j): 100 left = i 101 return left 102 return left 103 104 def is_pixel_equal(self, image1, image2, x, y): 105 """ 106 判斷兩個像素是否相同 107 :param image1: 圖片1 108 :param image2: 圖片2 109 :param x: 位置x 110 :param y: 位置y 111 :return: 像素是否相同 112 """ 113 # 取兩個圖片的像素點 114 pixel1 = image1.load()[x, y] 115 pixel2 = image2.load()[x, y] 116 threshold = 60 117 if abs(pixel1[0] - pixel2[0]) < threshold and abs(pixel1[1] - pixel2[1]) < threshold and abs( 118 pixel1[2] - pixel2[2]) < threshold: 119 return True 120 else: 121 return False 122 123 def get_track(self, distance): 124 """ 125 根據偏移量獲取移動軌跡 126 :param distance: 偏移量 127 :return: 移動軌跡 128 """ 129 # 移動軌跡 130 track = [] 131 # 當前位移 132 current = 0 133 # 減速閾值 134 mid = distance * 4 / 5 135 # 計算間隔 136 t = 0.2 137 # 初速度 138 v = 0 139 140 while current < distance: 141 if current < mid: 142 # 加速度為正2 143 a = 2 144 else: 145 # 加速度為負3 146 a = -3 147 # 初速度v0 148 v0 = v 149 # 當前速度v = v0 + at 150 v = v0 + a * t 151 # 移動距離x = v0t + 1/2 * a * t^2 152 move = v0 * t + 1 / 2 * a * t * t 153 # 當前位移 154 current += move 155 # 加入軌跡 156 track.append(round(move)) 157 return track 158 159 def move_to_gap(self, slider, track): 160 """ 161 拖動滑塊到缺口處 162 :param slider: 滑塊 163 :param track: 軌跡 164 :return: 165 """ 166 ActionChains(self.browser).click_and_hold(slider).perform() 167 for x in track: 168 ActionChains(self.browser).move_by_offset(xoffset=x, yoffset=0).perform() 169 time.sleep(0.5) 170 ActionChains(self.browser).release().perform() 171 172 def login(self): 173 """ 174 登錄 175 :return: None 176 """ 177 submit = self.wait.until(EC.element_to_be_clickable((By.CLASS_NAME, 'login-btn'))) 178 submit.click() 179 time.sleep(10) 180 print('登錄成功') 181 182 def crack(self): 183 # 輸入用戶名密碼 184 self.open() 185 # 點擊驗證按鈕 186 button = self.get_geetest_button() 187 button.click() 188 # 獲取驗證碼圖片 189 image1 = self.get_geetest_image('captcha1.png') 190 # 點按呼出缺口 191 slider = self.get_slider() 192 slider.click() 193 # 獲取帶缺口的驗證碼圖片 194 image2 = self.get_geetest_image('captcha2.png') 195 # 獲取缺口位置 196 gap = self.get_gap(image1, image2) 197 print('缺口位置', gap) 198 # 減去缺口位移 199 gap -= BORDER 200 # 獲取移動軌跡 201 track = self.get_track(gap) 202 print('滑動軌跡', track) 203 # 拖動滑塊 204 self.move_to_gap(slider, track) 205 206 success = self.wait.until( 207 EC.text_to_be_present_in_element((By.CLASS_NAME, 'geetest_success_radar_tip_content'), '驗證成功')) 208 print(success) 209 210 # 失敗后重試 211 if not success: 212 self.crack() 213 else: 214 self.login() 215 216 217 if __name__ == '__main__': 218 crack = CrackGeetest() 219 crack.crack()
但是,當我們截取圖片的時候,網站將圖片分割為不同的圖片隨機組合,我們就無法使用這一方法。
3.點觸驗證碼的識別
點觸的網址掛了,
4. 微博宮格識別