selenium處理極驗滑動驗證碼


要爬取一個網站遇到了極驗的驗證碼,這周都在想着怎么破解這個,網上搜了好多知乎上看到有人問了這問題https://www.zhihu.com/question/28833985,我按照這思路去大概實現了一下。

1.使用htmlunit(這種方式我沒成功,模擬鼠標拖拽后軌跡沒生成,可以跳過)

我用的是java,我首先先想到了用直接用htmlunit,我做了點初始化

private void initWebClient() {
        if (webClient != null) {
            return;
        }
        webClient = new WebClient(BrowserVersion.FIREFOX_24);
         webClient.getOptions().setProxyConfig(new ProxyConfig("127.0.0.1",8888));
        webClient.getOptions().setActiveXNative(true);
        webClient.getOptions().setUseInsecureSSL(true); // 配置證書
        webClient.getOptions().setJavaScriptEnabled(true);
        webClient.getOptions().setCssEnabled(true);
        webClient.setCssErrorHandler(new SilentCssErrorHandler());
        webClient.getOptions().setThrowExceptionOnScriptError(false);
        webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
        CookieManager cookieManager = new CookieManager();
        List<org.apache.http.cookie.Cookie> httpCookies = client.getCookies();//其方式獲取的cookie
        for (org.apache.http.cookie.Cookie cookie : httpCookies) {
            cookieManager.addCookie(new com.gargoylesoftware.htmlunit.util.Cookie(cookie));
        }
        webClient.setCookieManager(cookieManager);
    }

初始化代理,cookie..然后就能正常調用了

HtmlPage page = webClient.getPage("http://www.qixin.com/login");//企信寶
gePageInfor(page);

下面就是我獲取圖片,還原圖片並且模擬拖拽,(這里我覺得是有些問題的,可能是拖拽我模擬的不對導致觸發的js並沒有生成正確的軌跡,還請大家幫忙看看哪里錯了)

private void gePageInfor(HtmlPage page) {
        String[] img_slice={"div", "class", "gt_cut_fullbg_slice"};
        String[] img_bg_slice={"div", "class", "gt_cut_bg_slice"};
        HtmlDivision div = (HtmlDivision) page.getElementById("captcha");
        int deCAPTCHA = 0;
        try {
            byte[] img_slice_binary = client.get(getImgUrl(img_slice, div, true)).getBinary();//獲取圖片byte
            byte[] img_bg_slice_binary = client.get(getImgUrl(img_bg_slice, div, false)).getBinary();
            //獲取還原后的圖片
            BufferedImage geetestImg = ImgTest.getGeetestImg(img_slice_binary, ImgTest.imgArray);
            BufferedImage geetestImg2 = ImgTest.getGeetestImg(img_bg_slice_binary, ImgTest.imgArray);
            //獲得圖片移動位置(目前還有問題,需改用第三方圖片識別)
            deCAPTCHA =ImgTest.deCAPTCHA(geetestImg,geetestImg2);
            System.out.println(deCAPTCHA);
        } catch (IOException | FetchException e) {
            e.printStackTrace();
        }
        HtmlDivision div_slider_knob = get_div_slider_knob(page,"gt_slider_knob gt_show");//獲取要移動div
        HtmlPage mouseOver = (HtmlPage) div_slider_knob.mouseOver();
        HtmlPage mouseDownPage = (HtmlPage)div_slider_knob.mouseDown();
        div_slider_knob = get_div_slider_knob(mouseDownPage,"gt_slider_knob gt_show moving");
        mouseMoveX(deCAPTCHA, div_slider_knob, mouseDownPage);
        HtmlPage newPage =(HtmlPage)div_slider_knob.mouseOver();
//        newPage =(HtmlPage)div_slider_knob.mouseDown();
        System.out.println(newPage.asXml());
        div = (HtmlDivision)newPage.getElementById("captcha");
        HtmlElement htmlElement = div.getElementsByAttribute("div", "class", "gt_slice gt_show moving").get(0);
        System.out.println(htmlElement);
        newPage =(HtmlPage)div_slider_knob.mouseUp();//觸發js,軌跡沒有生成
        System.out.println("---------------");
        System.out.println(newPage.asXml());        
        if (newPage.getElementById("captcha")!=null) {//錯誤重試
            //gePageInfor(newPage);
        }
    }

    private void mouseMoveX(int deCAPTCHA, HtmlDivision div_slider_knob, HtmlPage mouseDown) {
        MouseEvent mouseEvent = new MouseEvent(div_slider_knob, MouseEvent.TYPE_MOUSE_MOVE, false, false, false, MouseEvent.BUTTON_LEFT);
        mouseEvent.setClientX( mouseEvent.getClientX()+((deCAPTCHA!=0)?deCAPTCHA:99));    //移動x坐標
        ScriptResult scriptResult = mouseDown.getDocumentElement().fireEvent(mouseEvent);
    }
    private HtmlDivision get_div_slider_knob(HtmlPage page,String classString) {
        return (HtmlDivision)(((HtmlDivision) page.getElementById("captcha")).getElementsByAttribute("div", "class", classString).get(0));
    }

    private String getImgUrl(String[] img_slice, HtmlDivision div, boolean isNeedCheckPostion) {
        String url ="";
        int[] postion = new int[2];
        boolean empty = div.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]).isEmpty();
        if (div.hasChildNodes() && !empty) {
            List<HtmlElement> elementsByAttribute = div.getElementsByAttribute(img_slice[0],img_slice[1],img_slice[2]);    
            for(int i = 0;i<elementsByAttribute.size();i++){
                HtmlDivision div_img = (HtmlDivision)elementsByAttribute.get(i);
                String style = div_img.getAttribute("style");
                String[] imge_url_position = style.split(";");
                if(StringUtils.isBlank(url)){//確認url
                    url = StringUtils.replacePattern(imge_url_position[0], ".*\\(", "").replace(")", "");
                }
                if (isNeedCheckPostion) {//確認圖片切割postion,兩張圖切割方式一樣  background-position: -157px -58px
//                    String[] positionS = StringUtils.split(StringUtils.remove(imge_url_position[1], "px").replace("-", "").replaceAll(".*:", ""), null);
                    String[] positionS = StringUtils.split(StringUtils.removePattern(imge_url_position[1], "[^\\d+ \\s]"),null);
                    postion[0] = Integer.parseInt(positionS[0]);
                    postion[1] = Integer.parseInt(positionS[1]);
                    int[] is = ImgTest.imgArray[i];
                    if (is[0]!=postion[0]||is[1]!=postion[1]) {
                        logger.debug("更新分割postion");
                        ImgTest.imgArray[i] = postion;
                    }
                    System.out.println(ImgTest.imgArray);
                    isNeedCheckPostion= false;
                }
            }
        }
        return url;
    }

對比圖片獲取位移方法(deCAPTCHA)是錯的我就不放代碼了,下面是其中還原圖片用的方法,目前是其實審查元素后你就明白怎么還原這個圖片了,這里是每次讀的10px,58px

public static BufferedImage getGeetestImg(byte[] binary, int[][] imgArray) throws IOException {
        BufferedImage img = ImageIO.read(new ByteArrayInputStream(binary));
        List<BufferedImage> list = new ArrayList<>();
        for (int i=0;i< imgArray.length;i++) {    
            BufferedImage subimage = img.getSubimage(imgArray[i][0], imgArray[i][1], 10, 58);
            list.add(subimage);
//            ImageIO.write(subimage, "jpg", new File("d:\\image\\imgs"+i+".jpg"));
        }
        BufferedImage mergeImageUp = null;
        BufferedImage mergeImageDown = null;
        int mid = list.size()>>>1;
        for (int i = 0; i <mid-1 ; i++) {
            mergeImageUp =  mergeImage(mergeImageUp==null?list.get(i):mergeImageUp, list.get(i+1), true);                
        }
        for(int i = mid;i<list.size()-1;i++){
            mergeImageDown = mergeImage(mergeImageDown==null?list.get(i):mergeImageDown,list.get(i+1), true);
        }
        img = mergeImage(mergeImageUp, mergeImageDown, false);
        return img;
    }
     public static BufferedImage mergeImage(BufferedImage img1,
                BufferedImage img2, boolean isHorizontal) throws IOException {
            int w1 = img1.getWidth();
            int h1 = img1.getHeight();
            int w2 = img2.getWidth();
            int h2 = img2.getHeight();
            // 從圖片中讀取RGB
            int[] ImageArrayOne = new int[w1 * h1];
            ImageArrayOne = img1.getRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 逐行掃描圖像中各個像素的RGB到數組中
            int[] ImageArrayTwo = new int[w2 * h2];
            ImageArrayTwo = img2.getRGB(0, 0, w2, h2, ImageArrayTwo, 0, w2);

            // 生成新圖片
            BufferedImage DestImage = null;
            if (isHorizontal) { // 水平方向合並
                DestImage = new BufferedImage(w1+w2, h1, BufferedImage.TYPE_INT_RGB);
                DestImage.setRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 設置上半部分或左半部分的RGB
                DestImage.setRGB(w1, 0, w2, h2, ImageArrayTwo, 0, w2);
            } else { // 垂直方向合並
                DestImage = new BufferedImage(w1, h1 + h2,
                        BufferedImage.TYPE_INT_RGB);
                DestImage.setRGB(0, 0, w1, h1, ImageArrayOne, 0, w1); // 設置上半部分或左半部分的RGB
                DestImage.setRGB(0, h1, w2, h2, ImageArrayTwo, 0, w2); // 設置下半部分的RGB
            }

            return DestImage;
        }
    

2.使用selenium

后來我想着是我模擬鼠標這個動作哪里有問題,我就又找到了selenium(2.42.2),他也能操作htmlunit關鍵他的鼠標動作好像封裝比較完全

但是我嘗試了以后發現了這個,HtmlUnitMouse這個動作沒有實現

 public void mouseMove(Coordinates where, long xOffset, long yOffset) {
    throw new UnsupportedOperationException("Moving to arbitrary X,Y coordinates not supported.");
  }

好吧,於是調用chrome吧

System.setProperty("webdriver.chrome.driver","C:\\chromedriver.exe");
        Proxy proxy = new Proxy();  
        //設置代理服務器地址  
        proxy.setHttpProxy("127.0.0.1:8888");  
//        DesiredCapabilities capabilities = DesiredCapabilities.htmlUnitWithJs();
        DesiredCapabilities capabilities = DesiredCapabilities.chrome();  
        capabilities.setCapability(CapabilityType.PROXY, proxy);
//        final WebDriver driver = new HtmlUnitDriver(capabilities);      
        WebDriver driver = new ChromeDriver(capabilities);
        driver.get("http://www.qixin.com/login");
        driver.manage().timeouts().implicitlyWait(10, TimeUnit.SECONDS);
         checkPage(driver,"return $('.gt_cut_fullbg_slice');");
        // 獲取 網頁的 title
        System.out.println("1 Page title is: " + driver.getTitle());
        // 通過 id 找到 input 的 DOM
        String pageSource = driver.getPageSource();
        System.out.println(pageSource);
        org.openqa.selenium.JavascriptExecutor executor = (org.openqa.selenium.JavascriptExecutor)driver;
        boolean equals = executor.executeScript("return document.readyState").equals("complete");
        int moveX =99;//移動位置
        if (equals) {
            WebElement element = driver.findElement(By.className("gt_slider_knob"));//(".gt_slider_knob"));
            Point location = element.getLocation();
            element.getSize();
            Actions action = new Actions(driver); 
            //             action.clickAndHold().perform();// 鼠標在當前位置點擊后不釋放
//             action.clickAndHold(element).perform();// 鼠標在 onElement 元素的位置點擊后不釋放
//             action.clickAndHold(element).moveByOffset(location.x+99,location.y).release().perform(); //選中source元素->拖放到(xOffset,yOffset)位置->釋放左鍵
             action.dragAndDropBy(element, location.x+moveX,location.y).perform();
//            action.dragAndDrop(element,newelement).perform();
            pageSource = driver.getPageSource();
        }
        //更新cookie
        Set<org.openqa.selenium.Cookie> cookies = driver.manage().getCookies();
        Set<Cookie> cookies2 = new HashSet<>();
        for (org.openqa.selenium.Cookie cookie : cookies) {
            cookies2.add((Cookie) new Cookie(cookie.getDomain(), cookie.getName(), cookie.getValue(), cookie.getPath(), cookie.getExpiry(), true));
        }
        for (Cookie cookie : cookies2) {
            org.apache.http.cookie.Cookie httpClient = cookie.toHttpClient();
        }
        System.out.println(pageSource);

這樣提交的表單確實是有軌跡的,這里移動位置我先寫了個固定值,可以由上面圖片還原,以及一些開源的圖片識別工具識別出位置。以上應該就能解決這個滑動驗證碼了

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM