(1)doc.getElementsByTag(String tagName);
(2)doc.getElementById(String id);
(3)doc.getElementsByClass(String className);
(4)doc.getElementsByAttribute(String key);
elements=document.getElementsByAttribute("width"); for(Element e:elements){ System.out.println(e.toString()); }
(5)doc.getElementsByAttributeValue(String key,String value);
示例:通過key-value查找src=“/images/logo_small.gif”的元素
//根據key-value名稱來查詢DOM(查找src="") elements=document.getElementsByAttributeValue("src", "/images/logo_small.gif"); System.out.println(elements.get(0).toString());
示例:通過key-value查找target=“_blank”的元素
elements=document.getElementsByAttributeValue("target","_blank"); for(Element e:elements){ System.out.println(e.toString()); }
使用document.select();選擇元素
通過class一級一級往下找
package com.oracle.zibo; import org.apache.http.HttpEntity; import org.apache.http.HttpHost; import org.apache.http.client.config.RequestConfig; import org.apache.http.client.methods.CloseableHttpResponse; import org.apache.http.client.methods.HttpGet; import org.apache.http.impl.client.CloseableHttpClient; import org.apache.http.impl.client.HttpClients; import org.apache.http.util.EntityUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; public class Demo2 { public static void main(String[] args) throws Exception { CloseableHttpClient closeableHttpClient=HttpClients.createDefault(); HttpGet httpGet=new HttpGet("http://www.bootcss.com/"); CloseableHttpResponse closeableHttpResponse=closeableHttpClient.execute(httpGet); HttpEntity httpEntity=closeableHttpResponse.getEntity(); //獲取實體、網頁內容 String str=EntityUtils.toString(httpEntity, "utf-8"); closeableHttpResponse.close(); closeableHttpClient.close(); Document document=Jsoup.parse(str); //解析網頁 //查找bootstrap主頁下的所有標題 Elements elements=document.select(".row .thumbnail .caption h3 a"); for(Element e:elements){ System.out.println(e.text()); } } }
使用a["href"]
查找所有帶href屬性的a標簽
//查找a[href] Elements elements=document.select("a[href]"); for(Element e:elements){ System.out.println(e.html()); }
使用"img[src$=.png]"
查找擴展名為.png的圖片的元素
Elements elements=document.select("img[src$=.png]"); for(Element e:elements){ System.out.println(e.toString()); }
取得我們需要的信息
Elements elements=document.select("img[src$=.png]"); for(Element e:elements){ System.out.println(e.toString()); System.out.println(e.text()); //取得標簽中的內容 System.out.println(e.html()); //取得標簽中的html代碼 System.out.println(e.attr("src")); //取得某屬性的屬性值 }
e.attr(屬性),返回屬性值
.first()取得第一個
.last()取得最后一個
Element element=document.select("img[src$=.gif]").first(); System.out.println(element.attr("src")); //取得某屬性的屬性值