from selenium import webdriver from PIL import Image import time import random from selenium.webdriver import ActionChains import cv2 from matplotlib import pyplot as plt class slide(): #初始化 def __init__(self): self.driver = webdriver.Chrome(executable_path='..') #自己的驅動地址 self.driver.maximize_window() self.trance =0 self.driver.get("https://passport.bilibili.com/login") self.driver.find_element_by_id('login-username').send_keys('....')#用戶名 self.driver.find_element_by_id('login-passwd').send_keys('....')#密碼 self.driver.find_element_by_class_name('btn-login').click() print('login..........') time.sleep(3) #有完整背景圖的網頁截圖 def before_deal_image(self): self.js1 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="block"' #獲得 self.driver.execute_script(self.js1) time.sleep(2) self.location = self.driver.find_element_by_class_name('geetest_widget').location self.size = self.driver.find_element_by_class_name('geetest_widget').size time.sleep(2) self.driver.save_screenshot('capture1.png') # 截取全屏 self.left = self.location['x'] + 220 # 后面的數字自己調節控制截圖包含驗證碼圖片 self.top = self.location['y'] + 57 self.right = self.location['x'] + self.size['width'] + 295 self.bottom = self.location['y'] + self.size['height'] + 113 # print(self.location, self.size) self.im = Image.open('capture1.png') self.im = self.im.crop((self.left, self.top, self.right, self.bottom)) self.im.save('ele_capture1.png') # 滑塊移動 def slide(self,num): self.num=num print('滑塊應該移動距離------------->',self.num) self.button = self.driver.find_element_by_class_name('geetest_slider_button') #處理人機行為 first_distance = self.num/4*3 second_distance = self.num-first_distance-3 third_distance = self.num-second_distance-first_distance ActionChains(self.driver).click_and_hold(self.button).move_by_offset(first_distance,random.random()).perform() time.sleep(0.5) ActionChains(self.driver).click_and_hold(self.button).move_by_offset(second_distance,random.random()).perform() time.sleep(0.9) ActionChains(self.driver).click_and_hold(self.button).move_by_offset(third_distance,random.random()).release().perform() # 有缺口背景圖的網頁截圖 def after_deal_image(self): self.js2 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="none"' self.driver.execute_script(self.js2) self.driver.save_screenshot('capture2.png') #整張網頁圖 self.left = self.location['x'] + 220#與上面相同 self.top = self.location['y'] + 57 self.right = self.location['x'] + self.size['width'] + 295 self.bottom = self.location['y'] + self.size['height'] + 113 # print(self.location, self.size) self.im = Image.open('capture2.png') self.im = self.im.crop((self.left, self.top, self.right, self.bottom)) self.im.save('ele_capture2.png') #元素切割圖 #---------------------獲得移動距離-----------------# #獲得滑動距離 def slide_distance(self,image1, image2): cut_image = Image.open(image2) full_image = Image.open(image1) threshold = 86 # 邊界值顏色都為85,85,85 for i in range(75, cut_image.size[0]): for j in range(0, cut_image.size[1]): pixel1 = cut_image.getpixel((i, j)) pixel2 = full_image.getpixel((i, j)) res_R = abs(pixel1[0] - pixel2[0]) # 計算RGB差 res_G = abs(pixel1[1] - pixel2[1]) # 計算RGB差 res_B = abs(pixel1[2] - pixel2[2]) # 計算RGB差 if res_R > threshold and res_G > threshold and res_B > threshold: print(i - 7) return i - 7 #-------------------------------------處理相似度問題--------------------------------# # size=(256, 256) def classify_gray_hist(self,image1, image2, size=(328, 211)): #截圖大小 # 先計算直方圖 # 幾個參數必須用方括號括起來 # 這里直接用灰度圖計算直方圖,所以是使用第一個通道, # 也可以進行通道分離后,得到多個通道的直方圖 # bins 取為16 image1 = cv2.resize(image1, size) image2 = cv2.resize(image2, size) hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0]) #缺口背景圖 hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0]) #完整背景圖 # 可以比較下直方圖 # plt.plot(range(256),hist1,'r') # plt.plot(range(256),hist2,'b') # plt.show() # 計算直方圖的重合度 degree = 0 for i in range(75,len(hist1)): if hist1[i] != hist2[i]: degree = degree + (1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i])) else: degree = degree + 1 degree = degree / len(hist1) return degree # -------------------------------------獲得相似度-------------------------------# def run0(self,image1, image2): img1 = cv2.imread(image1) # cv2.imshow('img1', img1) img2 = cv2.imread(image2) # cv2.imshow('img2', img2) degree = self.classify_gray_hist(img1, img2) print('兩張圖片相似度為。。。。。',int(100 * degree[0])) #這是完整背景圖和驗證碼圖的相似度 cv2.waitKey(0) return int(degree[0] * 100) # 函數的啟動 def run(self): try: self.before_deal_image() self.after_deal_image() self.num = self.slide_distance('ele_capture1.png', 'ele_capture2.png') print('原始距離--------->',self.num) # 'ele_capture1.png', 'ele_capture2.png' self.image1 = 'ele_capture1.png' self.image2 = 'ele_capture2.png' self.result = self.run0(self.image1, self.image2) real_distance = self.num*(262/328) #這是實際截圖大長度和驗證碼實際圖片的的長度,自己更改 print('按照圖像大小比列計算實際移動距離',real_distance) self.slide(real_distance) time.sleep(6) except: print('login_out..............') time.sleep(7) finally: self.driver.quit() print('程序運行結束') slide().run()
透明度0 和1 的色差85,86左右徘徊,R,G,B三值都相等,均為左邊的85,86,因此。閥值86,85均可,即上面函數的 threshold,主要的難點就是缺口位置的查找。故此需要一個函數來測試是否符合下面介紹。
import cv2 from PIL import Image def get_distance(cut_image, full_image): cut_image = Image.open(cut_image) #缺口背景圖 full_image = Image.open(full_image) #完整背景圖 threshold = 86 #灰度值正好為86,86,86 這個是透明度的差值,邊界值像素的RGB中的B值為准 for i in range(75, cut_image.size[0]): #75為滑塊的截圖最右邊陰影到圖片最左端的長度 for j in range(0, cut_image.size[1]): pixel1 = cut_image.getpixel((i, j)) pixel2 = full_image.getpixel((i, j)) res_R = abs(pixel1[0] - pixel2[0]) # 計算RGB差 res_G = abs(pixel1[1] - pixel2[1]) # 計算RGB差 res_B = abs(pixel1[2] - pixel2[2]) # 計算RGB差 if res_R > threshold and res_G > threshold and res_B > threshold: print(i-7) return i-7 get_distance('ele_capture2.png','ele_capture1.png') #'ele_capture2.png','ele_capture1.png'(缺口背景圖,完整背景圖)
執行完第一個類,如果沒登陸成功,就執行第上面這個,更改threshold ,並且將第一個類中的slide_distance()函數替換,上面函數中75均為為滑塊的截圖最右邊陰影到圖片最左端的長度,這個得自己修改。
還有另一個方法就是移動滑塊進行截圖,86的色差變小,然后移動距離變小,移動一次進行圖片相似度處理,與背景完全圖相似度90%之上后將移動的距離保留,進行行為處理,模擬人的移動方法,然后移動滑塊,也可以成功,這需要圖像算法,我是不會,但我找到資料后測試過,也能通過驗證。慢,但是准確度100%。
如果都能成功,就可以將driver設置為無頭模式,讓selenium不在界面上顯示。
后續會出更好的爬蟲博文。
喜歡就點個贊,萌萌噠。