1 環境搭建:
1)下載
從鏈接:http://sourceforge.net/projects/htmlunit/files/htmlunit/
下載最新的bin文件
2)關於bin文件
里面主要包含兩部分,一是lib目錄下的.jar文件,還有就是apidocs目錄下的幫助文件(即API說明文件,打開index-all.html,是以網頁形式提供)
3)配置java的CLASSPATH(純手工方法)
將lib目錄下的所有.jar文件復制到任意目錄(如:c:\htmlunit\lib\)
然后右擊我的電腦->屬性->高級->環境變量->系統變量 中,對CLASSPATH進行編輯,如果沒有就新建一個(如果運行java或編譯時有錯誤,就在)
務必將所有.jar文件的詳細地址添加到CLASSPATH中,而不是用“c:\htmlunit\lib\”來代替,如.;c:\htmlunit\lib\1.jar;c:\htmlunit\lib\2.jar; 才是正確的寫法
務必每一個都寫清楚,需要注意最前面有個點".",最后面有個";"
2 解釋和說明:
1).jar其實就是編譯好的.class文件集,可以使用rar解壓軟件打開。所以.jar本質是一個目錄
2)官網的教程有些地方寫的很奇怪和不直觀,所以我做了些調整,主要是使輸出結果更加直觀
3)里面每一個函數的具體使用方法在APIDOCS中已經有詳細的說明了,我這里就不重復了
3 開始翻譯教程
3.1 獲取頁面的TITLE、XML代碼、文本
import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.html.HtmlDivision; import com.gargoylesoftware.htmlunit.html.HtmlAnchor; import com.gargoylesoftware.htmlunit.*; import com.gargoylesoftware.htmlunit.WebClientOptions; import com.gargoylesoftware.htmlunit.html.HtmlInput; import com.gargoylesoftware.htmlunit.html.HtmlBody; import java.util.List; public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ String str; //創建一個webclient WebClient webClient = new WebClient(); //htmlunit 對css和javascript的支持不好,所以請關閉之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); //獲取頁面 HtmlPage page = webClient.getPage("http://www.baidu.com/"); //獲取頁面的TITLE str = page.getTitleText(); System.out.println(str); //獲取頁面的XML代碼 str = page.asXml(); System.out.println(str); //獲取頁面的文本 str = page.asText(); System.out.println(str); //關閉webclient webClient.closeAllWindows(); } }
3.2 使用不同版本的瀏覽器打開
import com.gargoylesoftware.htmlunit.WebClient; import com.gargoylesoftware.htmlunit.html.HtmlPage; import com.gargoylesoftware.htmlunit.BrowserVersion; import com.gargoylesoftware.htmlunit.html.HtmlDivision; import com.gargoylesoftware.htmlunit.html.HtmlAnchor; import com.gargoylesoftware.htmlunit.*; import com.gargoylesoftware.htmlunit.WebClientOptions; import com.gargoylesoftware.htmlunit.html.HtmlInput; import com.gargoylesoftware.htmlunit.html.HtmlBody; import java.util.List; public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ String str; //使用FireFox讀取網頁 WebClient webClient = new WebClient(BrowserVersion.FIREFOX_24); //htmlunit 對css和javascript的支持不好,所以請關閉之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = webClient.getPage("http://www.baidu.com/"); str = page.getTitleText(); System.out.println(str); //關閉webclient webClient.closeAllWindows(); } }
3.3 找到頁面中特定的元素
public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ //創建webclient WebClient webClient = new WebClient(BrowserVersion.CHROME); //htmlunit 對css和javascript的支持不好,所以請關閉之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/"); //通過id獲得"百度一下"按鈕 HtmlInput btn = (HtmlInput)page.getHtmlElementById("su"); System.out.println(btn.getDefaultValue()); //關閉webclient webClient.closeAllWindows(); } }
3.4 元素檢索
public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ //創建webclient WebClient webClient = new WebClient(BrowserVersion.CHROME); //htmlunit 對css和javascript的支持不好,所以請關閉之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/"); //查找所有div List<?> hbList = page.getByXPath("//div"); HtmlDivision hb = (HtmlDivision)hbList.get(0); System.out.println(hb.toString()); //查找並獲取特定input List<?> inputList = page.getByXPath("//input[@id='su']"); HtmlInput input = (HtmlInput)inputList.get(0); System.out.println(input.toString()); //關閉webclient webClient.closeAllWindows(); } }
3.5 提交搜索
public class helloHtmlUnit{ public static void main(String[] args) throws Exception{ //創建webclient WebClient webClient = new WebClient(BrowserVersion.CHROME); //htmlunit 對css和javascript的支持不好,所以請關閉之 webClient.getOptions().setJavaScriptEnabled(false); webClient.getOptions().setCssEnabled(false); HtmlPage page = (HtmlPage)webClient.getPage("http://www.baidu.com/"); //獲取搜索輸入框並提交搜索內容 HtmlInput input = (HtmlInput)page.getHtmlElementById("kw"); System.out.println(input.toString()); input.setValueAttribute("雅蠛蝶"); System.out.println(input.toString()); //獲取搜索按鈕並點擊 HtmlInput btn = (HtmlInput)page.getHtmlElementById("su"); HtmlPage page2 = btn.click(); //輸出新頁面的文本 System.out.println(page2.asText()); } }