Jsoup獲取DOM元素


(1)doc.getElementsByTag(String tagName);

(2)doc.getElementById(String id);

(3)doc.getElementsByClass(String className);

(4)doc.getElementsByAttribute(String key);

 

elements=document.getElementsByAttribute("width");
for(Element e:elements){
	System.out.println(e.toString());
}

  

(5)doc.getElementsByAttributeValue(String key,String value);

 

示例:通過key-value查找src=“/images/logo_small.gif”的元素

		//根據key-value名稱來查詢DOM(查找src="")
		elements=document.getElementsByAttributeValue("src", "/images/logo_small.gif");
		System.out.println(elements.get(0).toString());

示例:通過key-value查找target=“_blank”的元素

		elements=document.getElementsByAttributeValue("target","_blank");
		for(Element e:elements){
			System.out.println(e.toString());
		}

  

 使用document.select();選擇元素

通過class一級一級往下找

package com.oracle.zibo;

import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

public class Demo2 {

	public static void main(String[] args) throws Exception {
		CloseableHttpClient closeableHttpClient=HttpClients.createDefault();
		HttpGet httpGet=new HttpGet("http://www.bootcss.com/");
			
		CloseableHttpResponse closeableHttpResponse=closeableHttpClient.execute(httpGet);
		HttpEntity httpEntity=closeableHttpResponse.getEntity(); //獲取實體、網頁內容
			
		String str=EntityUtils.toString(httpEntity, "utf-8");
		
		closeableHttpResponse.close();
		closeableHttpClient.close();
		
		Document document=Jsoup.parse(str); //解析網頁
		
		//查找bootstrap主頁下的所有標題
		Elements elements=document.select(".row .thumbnail .caption h3 a");
		for(Element e:elements){
			System.out.println(e.text());
		}
	}

}

  

使用a["href"]

查找所有帶href屬性的a標簽

		//查找a[href]
		Elements elements=document.select("a[href]");
		for(Element e:elements){
			System.out.println(e.html());
		}

使用"img[src$=.png]"

查找擴展名為.png的圖片的元素

		Elements elements=document.select("img[src$=.png]");
		for(Element e:elements){
			System.out.println(e.toString());
		}

  

取得我們需要的信息

		Elements elements=document.select("img[src$=.png]");
		for(Element e:elements){
			System.out.println(e.toString());
			System.out.println(e.text()); //取得標簽中的內容
			System.out.println(e.html()); //取得標簽中的html代碼
			System.out.println(e.attr("src")); //取得某屬性的屬性值
		}

e.attr(屬性),返回屬性值

.first()取得第一個

.last()取得最后一個

Element element=document.select("img[src$=.gif]").first();
System.out.println(element.attr("src")); //取得某屬性的屬性值

  

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM