python爬蟲--------處理極驗驗證(滑塊拼圖驗證)bilibili模式


from selenium import webdriver
from PIL import Image
import time
import random
from selenium.webdriver import ActionChains
import cv2
from matplotlib import pyplot as plt





class slide():
    #初始化
    def __init__(self):
        self.driver = webdriver.Chrome(executable_path='..') #自己的驅動地址
        self.driver.maximize_window()
        self.trance =0
        self.driver.get("https://passport.bilibili.com/login")
        self.driver.find_element_by_id('login-username').send_keys('....')#用戶名
        self.driver.find_element_by_id('login-passwd').send_keys('....')#密碼
        self.driver.find_element_by_class_name('btn-login').click()
        print('login..........')
        time.sleep(3)

    #有完整背景圖的網頁截圖
    def before_deal_image(self):
        self.js1 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="block"' #獲得
        self.driver.execute_script(self.js1)
        time.sleep(2)

        self.location = self.driver.find_element_by_class_name('geetest_widget').location
        self.size = self.driver.find_element_by_class_name('geetest_widget').size


        time.sleep(2)
        self.driver.save_screenshot('capture1.png')  # 截取全屏

        self.left = self.location['x'] + 220  # 后面的數字自己調節控制截圖包含驗證碼圖片
        self.top = self.location['y'] + 57
        self.right = self.location['x'] + self.size['width'] + 295
        self.bottom = self.location['y'] + self.size['height'] + 113

        # print(self.location, self.size)
        self.im = Image.open('capture1.png')
        self.im = self.im.crop((self.left, self.top, self.right, self.bottom))
        self.im.save('ele_capture1.png')

    # 滑塊移動
    def slide(self,num):

        self.num=num
        print('滑塊應該移動距離------------->',self.num)

        self.button = self.driver.find_element_by_class_name('geetest_slider_button')
     #處理人機行為 
        first_distance = self.num/4*3
        second_distance = self.num-first_distance-3
        third_distance = self.num-second_distance-first_distance
        ActionChains(self.driver).click_and_hold(self.button).move_by_offset(first_distance,random.random()).perform()
        time.sleep(0.5)
        ActionChains(self.driver).click_and_hold(self.button).move_by_offset(second_distance,random.random()).perform()
        time.sleep(0.9)
        ActionChains(self.driver).click_and_hold(self.button).move_by_offset(third_distance,random.random()).release().perform()
     

    # 有缺口背景圖的網頁截圖
    def after_deal_image(self):
        self.js2 = 'document.getElementsByClassName("geetest_canvas_fullbg")[0].style.display="none"'
        self.driver.execute_script(self.js2)



        self.driver.save_screenshot('capture2.png') #整張網頁圖

        self.left = self.location['x'] + 220#與上面相同
        self.top = self.location['y'] + 57
        self.right = self.location['x'] + self.size['width'] + 295
        self.bottom = self.location['y'] + self.size['height'] + 113

        # print(self.location, self.size)
        self.im = Image.open('capture2.png')
        self.im = self.im.crop((self.left, self.top, self.right, self.bottom))
        self.im.save('ele_capture2.png')  #元素切割圖






    #---------------------獲得移動距離-----------------#

    #獲得滑動距離
    def slide_distance(self,image1, image2):
        cut_image = Image.open(image2)
        full_image = Image.open(image1)
        threshold = 86  # 邊界值顏色都為85,85,85

        for i in range(75, cut_image.size[0]):
            for j in range(0, cut_image.size[1]):
                pixel1 = cut_image.getpixel((i, j))
                pixel2 = full_image.getpixel((i, j))
                res_R = abs(pixel1[0] - pixel2[0])  # 計算RGB差
                res_G = abs(pixel1[1] - pixel2[1])  # 計算RGB差
                res_B = abs(pixel1[2] - pixel2[2])  # 計算RGB差

                if res_R > threshold and res_G > threshold and res_B > threshold:
                    print(i - 7)
                    return i - 7




    #-------------------------------------處理相似度問題--------------------------------#
    # size=(256, 256)
    def classify_gray_hist(self,image1, image2, size=(328, 211)): #截圖大小
        # 先計算直方圖
        # 幾個參數必須用方括號括起來
        # 這里直接用灰度圖計算直方圖,所以是使用第一個通道,
        # 也可以進行通道分離后,得到多個通道的直方圖
        # bins 取為16
        image1 = cv2.resize(image1, size)
        image2 = cv2.resize(image2, size)
        hist1 = cv2.calcHist([image1], [0], None, [256], [0.0, 255.0]) #缺口背景圖
        hist2 = cv2.calcHist([image2], [0], None, [256], [0.0, 255.0]) #完整背景圖
        # 可以比較下直方圖
        # plt.plot(range(256),hist1,'r')
        # plt.plot(range(256),hist2,'b')
        # plt.show()
        # 計算直方圖的重合度
        degree = 0
        for i in range(75,len(hist1)):
            if hist1[i] != hist2[i]:
                degree = degree + (1 - abs(hist1[i] - hist2[i]) / max(hist1[i], hist2[i]))
        else:
            degree = degree + 1
        degree = degree / len(hist1)
        return degree

    # -------------------------------------獲得相似度-------------------------------#
    def run0(self,image1, image2):
        img1 = cv2.imread(image1)
        # cv2.imshow('img1', img1)
        img2 = cv2.imread(image2)
        # cv2.imshow('img2', img2)
        degree = self.classify_gray_hist(img1, img2)

        print('兩張圖片相似度為。。。。。',int(100 * degree[0])) #這是完整背景圖和驗證碼圖的相似度
        cv2.waitKey(0)
        return int(degree[0] * 100)





    # 函數的啟動
    def run(self):
        try:
            self.before_deal_image()
            self.after_deal_image()
            self.num = self.slide_distance('ele_capture1.png', 'ele_capture2.png')
            print('原始距離--------->',self.num)
            # 'ele_capture1.png', 'ele_capture2.png'
            self.image1 = 'ele_capture1.png'
            self.image2 = 'ele_capture2.png'
            self.result = self.run0(self.image1, self.image2)
            real_distance = self.num*(262/328) #這是實際截圖大長度和驗證碼實際圖片的的長度,自己更改
            print('按照圖像大小比列計算實際移動距離',real_distance)
            self.slide(real_distance)
            time.sleep(6)
        except:
            print('login_out..............')
            time.sleep(7)
        finally:
            self.driver.quit()
            print('程序運行結束')



slide().run()

  

 

 

   

  透明度0 和1 的色差85,86左右徘徊,R,G,B三值都相等,均為左邊的85,86,因此。閥值86,85均可,即上面函數的 threshold,主要的難點就是缺口位置的查找。故此需要一個函數來測試是否符合下面介紹。

import cv2
from PIL import Image



def get_distance(cut_image, full_image):

    cut_image = Image.open(cut_image) #缺口背景圖
    full_image = Image.open(full_image) #完整背景圖
    threshold = 86     #灰度值正好為86,86,86 這個是透明度的差值,邊界值像素的RGB中的B值為准


    for i in range(75, cut_image.size[0]):  #75為滑塊的截圖最右邊陰影到圖片最左端的長度
        for j in range(0, cut_image.size[1]):
            pixel1 = cut_image.getpixel((i, j))
            pixel2 = full_image.getpixel((i, j))
            res_R = abs(pixel1[0] - pixel2[0])  # 計算RGB差
            res_G = abs(pixel1[1] - pixel2[1])  # 計算RGB差
            res_B = abs(pixel1[2] - pixel2[2])  # 計算RGB差

            if res_R > threshold and res_G > threshold and res_B > threshold:
                print(i-7)
                return i-7

get_distance('ele_capture2.png','ele_capture1.png')

#'ele_capture2.png','ele_capture1.png'(缺口背景圖,完整背景圖)

  

 

  執行完第一個類,如果沒登陸成功,就執行第上面這個,更改threshold ,並且將第一個類中的slide_distance()函數替換,上面函數中75均為為滑塊的截圖最右邊陰影到圖片最左端的長度,這個得自己修改。

 

 

  

  還有另一個方法就是移動滑塊進行截圖,86的色差變小,然后移動距離變小,移動一次進行圖片相似度處理,與背景完全圖相似度90%之上后將移動的距離保留,進行行為處理,模擬人的移動方法,然后移動滑塊,也可以成功,這需要圖像算法,我是不會,但我找到資料后測試過,也能通過驗證。慢,但是准確度100%。

  

  如果都能成功,就可以將driver設置為無頭模式,讓selenium不在界面上顯示。

 

 

  后續會出更好的爬蟲博文。

  喜歡就點個贊,萌萌噠。


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM