1.采用網站截圖方式
import requests import time from selenium import webdriver from PIL import Image def part_screenshot(driver): driver.save_screenshot("hello1.png") return Image.open("hello1.png") def get_image(driver): # 對驗證碼所在位置進行定位,然后截取驗證碼圖片 img = driver.find_element_by_xpath('//*[@id="u1"]/a[2]') time.sleep(2) location = img.location print(location, 111) size = img.size left = location['x'] top = location['y'] right = left + size['width'] bottom = top + size['height'] page_snap_obj = part_screenshot(driver) image_obj = page_snap_obj.crop((left, top, right, bottom)) return image_obj # 得到的就是驗證碼 if __name__ == '__main__': driver = webdriver.Chrome() driver.get("https://www.baidu.com") print(driver.title) # 打印頁面的標題 b = get_image(driver) b.save("1.png") print(b) driver.quit() # 一定要退出!不退出會有殘留進程!
2.采用cookie獲取圖片驗證碼
原理:瀏覽器請求圖片驗證碼時,有些網頁將圖片的驗證碼編號信息存儲到cookie中,用戶提交登陸時,只提交圖片驗證碼的值即可。后端通過cookie驗證圖片驗證碼編號是否正確。
class Yun00Da1ma(): def __init__(self): self.session=requests.session() self.headers={ "Referer":"http://www.×××.com/", "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36" } def get_picture(self): url="http://www.×××.com/index/captcha" resp = self.session.get(url, headers=self.headers)#請求圖片時,服務器會返回圖片,並添加圖片信息到session with open("dama.png","wb") as f: f.write(resp.content) img_result=check_img("dama.png") url1="http://www.×××.com/index/login?username=×××&password=×××&utype=ajh&vcode="+img_result resp2=self.session.get(url1,headers=self.headers)#進行模擬登陸 # print(resp2.content.decode("gbk","ignore"),11) def get_user(self):#采用session保持,直接登陸用戶界面,驗證是否登陸成功 url="http://www.×××.com/user" resp2 = self.session.get(url, headers=self.headers) if __name__ == '__main__': yun00da1ma=Yun00Da1ma() yun00da1ma.get_picture() yun00da1ma.get_user()