(1)doc.getElementsByTag(String tagName);
(2)doc.getElementById(String id);
(3)doc.getElementsByClass(String className);
(4)doc.getElementsByAttribute(String key);
elements=document.getElementsByAttribute("width");
for(Element e:elements){
System.out.println(e.toString());
}
(5)doc.getElementsByAttributeValue(String key,String value);
示例:通過key-value查找src=“/images/logo_small.gif”的元素
//根據key-value名稱來查詢DOM(查找src="")
elements=document.getElementsByAttributeValue("src", "/images/logo_small.gif");
System.out.println(elements.get(0).toString());
示例:通過key-value查找target=“_blank”的元素
elements=document.getElementsByAttributeValue("target","_blank");
for(Element e:elements){
System.out.println(e.toString());
}
使用document.select();選擇元素
通過class一級一級往下找
package com.oracle.zibo;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
public class Demo2 {
public static void main(String[] args) throws Exception {
CloseableHttpClient closeableHttpClient=HttpClients.createDefault();
HttpGet httpGet=new HttpGet("http://www.bootcss.com/");
CloseableHttpResponse closeableHttpResponse=closeableHttpClient.execute(httpGet);
HttpEntity httpEntity=closeableHttpResponse.getEntity(); //獲取實體、網頁內容
String str=EntityUtils.toString(httpEntity, "utf-8");
closeableHttpResponse.close();
closeableHttpClient.close();
Document document=Jsoup.parse(str); //解析網頁
//查找bootstrap主頁下的所有標題
Elements elements=document.select(".row .thumbnail .caption h3 a");
for(Element e:elements){
System.out.println(e.text());
}
}
}
使用a["href"]
查找所有帶href屬性的a標簽
//查找a[href]
Elements elements=document.select("a[href]");
for(Element e:elements){
System.out.println(e.html());
}
使用"img[src$=.png]"
查找擴展名為.png的圖片的元素
Elements elements=document.select("img[src$=.png]");
for(Element e:elements){
System.out.println(e.toString());
}
取得我們需要的信息
Elements elements=document.select("img[src$=.png]");
for(Element e:elements){
System.out.println(e.toString());
System.out.println(e.text()); //取得標簽中的內容
System.out.println(e.html()); //取得標簽中的html代碼
System.out.println(e.attr("src")); //取得某屬性的屬性值
}
e.attr(屬性),返回屬性值
.first()取得第一個
.last()取得最后一個
Element element=document.select("img[src$=.gif]").first();
System.out.println(element.attr("src")); //取得某屬性的屬性值
