使用htmlunit的好處有兩點,相比httpclient,htmlunit是對瀏覽器的模擬,比如你定位一個按鈕,就可以執行click()方法,此外不需要象在httpclient中一樣編寫復雜的代碼,如一堆request header還有一大堆請求參數,你只需要填寫用戶名,密碼,驗證碼即可,就象在使用一個沒有界面的瀏覽器,當然更重要的是htmlunit對js的支持設置極其簡單
1.添加maven的htmlunit(標紅)依賴
1 <dependencies>
2 <dependency>
3 <groupId>junit</groupId>
4 <artifactId>junit</artifactId>
5 <version>${junit.version}</version>
6 <scope>test</scope>
7 </dependency>
8 <dependency>
9 <groupId>com.alibaba</groupId>
10 <artifactId>fastjson</artifactId>
11 <version>1.2.47</version>
12 </dependency>
13 <dependency>
14 <groupId>org.jsoup</groupId>
15 <artifactId>jsoup</artifactId>
16 <version>1.11.3</version>
17 </dependency>
18 <dependency> 19 <groupId>net.sourceforge.htmlunit</groupId> 20 <artifactId>htmlunit</artifactId> 21 <version>2.18</version> 22 </dependency>
23
24 </dependencies>
2.思路
定位用戶名,密碼,驗證碼框等元素,填寫即可,驗證碼可以先把圖片下載下來然后手動輸入,也可以使用tess4j進行圖片識別,這里是手動輸入測試的網站,是一個偽ajaxsubmit,測試多次發現需要二次輸入驗證碼才能正確登錄,但令人疑惑的是兩次生成的驗證碼一樣(如果不一樣說明你第一次輸入錯誤)
1 public static void main(String[] args) throws Exception { 2 WebClient webClient = new WebClient(BrowserVersion.CHROME); 3 webClient.getOptions().setJavaScriptEnabled(true); 4 webClient.getOptions().setCssEnabled(true); 5 webClient.getOptions().setThrowExceptionOnScriptError(false); 6 // webClient.getOptions().setThrowExceptionOnFailingStatusCode(true); 7 webClient.getOptions().setActiveXNative(false); 8 9 10 //ajax 11 webClient.setAjaxController(new NicelyResynchronizingAjaxController()); 12 webClient.getOptions().setUseInsecureSSL(false); 13 14 15 //允許重定向 16 webClient.getOptions().setRedirectEnabled(true); 17 18 19 //連接超時 20 webClient.getOptions().setTimeout(5000); 21 22 //js執行超時 23 webClient.setJavaScriptTimeout(10000*3); 24 25 //對於此網站務必開啟 26 webClient.getCookieManager().setCookiesEnabled(true); 27 28 String url = "https://www.zuhaowan.com/login/"; 29 HtmlPage page = webClient.getPage(url); 30 webClient.waitForBackgroundJavaScript(5000); 31 32 33 34 35 HtmlPage newPage = readyPage(page, webClient); 36 // String content1 = newPage.asXml(); 37 // IOUtils.write(content1.getBytes(),new FileWriter(new File("f:/content1.txt"))); 38 39 //如果頁面url沒有變化重新進行一次登錄 40 if(newPage.getUrl().toString().equals(url)) { 41 System.out.println("出現錯誤請重新登錄-------------"); 42 HtmlPage result = readyPage(newPage,webClient); 43 System.out.println("url----------------"+result.getUrl()); 44 System.out.println("頁面----" + result.asXml()); 45 // IOUtils.write(result.asXml(),new FileWriter(new File("f:/content2.txt"))); 46 47 } 48 49 webClient.close(); 50 } 51 52 53 54 public static HtmlPage readyPage(HtmlPage page,WebClient webClient) throws Exception { 55 //封裝頁面元素 56 HtmlForm form = page.getHtmlElementById("form2"); 57 HtmlTextInput loginname = form.getInputByName("loginname"); 58 loginname.setValueAttribute("用戶名"); 59 HtmlPasswordInput loginpwd = form.getInputByName("loginpwd"); 60 loginpwd.setValueAttribute("密碼"); 61 62 //驗證碼輸入框 63 HtmlTextInput verify_code = form.getInputByName("verify_code"); 64 65 //驗證碼圖片 66 HtmlImage verify_img = (HtmlImage) page.getElementById("verify_img"); 67 UUID randomUUID = UUID.randomUUID(); 68 //保存 69 verify_img.saveAs(new File("./src/main/resources/image/verifyimg"+ randomUUID.toString() +".png")); 70 71 System.out.println("驗證碼圖片已保存!"); 72 System.out.println("請輸入驗證碼"); 73 //手動輸入驗證碼 74 Scanner scanner = new Scanner(System.in); 75 String code = scanner.nextLine(); 76 System.out.println("驗證碼-------------" + code); 77 verify_code.setValueAttribute(code); 78 79 80 //登錄按鈕也可以使用page.executeJavaScript("javascript:document.getElementById('loginsubmit').click()").getNewPage(); 81 HtmlAnchor login = page.getHtmlElementById("loginsubmit"); 82 HtmlPage newPage = login.click(); 83 84 //等待js加載 85 webClient.waitForBackgroundJavaScript(5000); 86 return newPage; 87 88 } 89
3.控制台部分輸出截圖