Java 中 利用正則表達式 獲取 網頁圖片




import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/***
* java抓取網絡圖片
* @author swinglife
*
*/
public class pimg {

// 地址
private static final String URL = "http://www.csdn.net";
// 編碼
private static final String ECODING = "UTF-8";
// 獲取img標簽正則
private static final String IMGURL_REG = "<img.*src=(.*?)[^>]*?>";
// 獲取src路徑的正則
private static final String IMGSRC_REG = "http:\"?(.*?)(\"|>|\\s+)";


public static void main(String[] args) throws Exception {
pimg cm = new pimg();
//獲得html文本內容
String HTML = cm.getHTML(URL);
//獲取圖片標簽
List<String> imgUrl = cm.getImageUrl(HTML);
//獲取圖片src地址
List<String> imgSrc = cm.getImageSrc(imgUrl);
//下載圖片
cm.Download(imgSrc);
}


/***
* 獲取HTML內容
*
* @param url
* @return
* @throws Exception
*/
private String getHTML(String url) throws Exception {
URL uri = new URL(url);
URLConnection connection = uri.openConnection();
InputStream in = connection.getInputStream();
byte[] buf = new byte[1024];
int length = 0;
StringBuffer sb = new StringBuffer();
while ((length = in.read(buf, 0, buf.length)) > 0) {
sb.append(new String(buf, ECODING));
}
in.close();
return sb.toString();
}

/***
* 獲取ImageUrl地址
*
* @param HTML
* @return
*/
private List<String> getImageUrl(String HTML) {
Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
List<String> listImgUrl = new ArrayList<String>();
while (matcher.find()) {
listImgUrl.add(matcher.group());
}
return listImgUrl;
}

/***
* 獲取ImageSrc地址
*
* @param listImageUrl
* @return
*/
private List<String> getImageSrc(List<String> listImageUrl) {
List<String> listImgSrc = new ArrayList<String>();
for (String image : listImageUrl) {
Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
while (matcher.find()) {
listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));
}
}
return listImgSrc;
}

/***
* 下載圖片
*
* @param listImgSrc
*/
private void Download(List<String> listImgSrc) {
try {
for (String url : listImgSrc) {
String imageName = url.substring(url.lastIndexOf("/") + 1, url.length());
URL uri = new URL(url);
InputStream in = uri.openStream();
FileOutputStream fo = new FileOutputStream("C:/Users/tutu/Desktop/img/"+imageName"//自定義路徑);// new File(imageName)相對絕對路徑
                byte[] buf = new byte[1024];
int length = 0;
System.out.println("開始下載:" + url);
while ((length = in.read(buf, 0, buf.length)) != -1) {
fo.write(buf, 0, length);
}
in.close();
fo.close();
System.out.println(imageName + "下載完成");
}
} catch (Exception e) {
System.out.println("下載失敗");
}
}


}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM