public
class
Demo {
public
static
void
main(String[] args)
throws
IOException {
//要抓取圖片的網址連接
String url =
"https://image.baidu.com/search/index?tn=baiduimage&ct=201326592&lm=-1&cl=2&ie=gbk&word=%B6%AB%B7%BD%BD%F0%B9%DD%B3%A4&fr=ala&ala=1&alatpl=adress&pos=0&hs=2&xthttps=111111"
;
//根據連接獲取一個Connection對象
InputStream is = getConnection(url,
null
).getInputStream();
//調用commonsio工具包中IOUtils的方法,返回HTML內容;
String html = IOUtils.toString(is,
"GBK"
);
//解析HTML內容,獲取所有圖片鏈接地址
List<String> picPaths = parseHtml(html);
//判斷是否獲取到圖片鏈接
if
(picPaths.size() >
0
) {
//創建一個線程池,處理下載任務
ExecutorService es = Executors.newFixedThreadPool(picPaths.size() <
50
? picPaths.size() :
50
);
//循環處理資源
for
(
final
String picPath : picPaths) {
//根據具體的資源,創建下載任務,提交到線程池中
es.execute(()->{downLoad(pathHandle(picPath),
"F:\\pic"
,url);});
}
//關閉線程池
es.shutdown();
}
}
/**
* 獲取唯一序列號,做為文件名
* @return
*/
private
static
String getUUID() {
UUID uuid = UUID.randomUUID();
return
uuid.toString().replaceAll(
"-"
,
""
);
}
/**
* 處理獲取到的圖片鏈接
* @param picPath
* @return
*/
private
static
String pathHandle(String picPath) {
if
(!picPath.startsWith(
"http"
)) {
picPath =
"http:"
+ picPath;
}
//這個處理,是針對天貓的圖片鏈接,用於下載大圖;
//天貓的商品圖片鏈接示例如下:
//http://img.alicdn.com/bao/uploaded/i4/TB19FGse7KWBuNjy1zjefkOypXa_032207.jpg_b.jpg
//去掉最后一個_以后的內容,可以下載大圖;否則就下載的是小圖
if
(picPath.indexOf(
"_"
) != picPath.lastIndexOf(
"_"
)) {
picPath = picPath.substring(
0
,picPath.lastIndexOf(
"_"
));
}
return
picPath;
}
/**
* 下載圖片
* @param picPath
* @param dir
* @param referer
*/
private
static
void
downLoad(String picPath, String dir,String referer){
try
{
//生成文件名
String name = getUUID()+
"."
+FilenameUtils.getExtension(picPath);
FileUtils.copyToFile(getConnection(picPath,referer).getInputStream(),
new
File(
new
File(dir),name));
System.out.println(picPath +
"下載完畢!"
);
}
catch
(IOException e) {
System.err.println(picPath+
"下載失敗!"
);
}
}
/**
* 使用正則表達式解析html內容,獲取圖片鏈接
* @param html
* @return
*/
private
static
List<String> parseHtml(String html) {
String regex =
"\"[^\"^(^)^}^>^<^{]+\\.(jpg|png|jpeg|gif)"
;
List<String> list =
new
ArrayList<>();
Pattern p = Pattern.compile(regex);
Matcher m = p.matcher(html);
while
(m.find()) {
list.add(m.group().substring(
1
));
}
return
list;
}
/**
* 根據url地址,獲取一個連接對象,同時設置請求頭,避免服務器防盜鏈,以及模擬瀏覽器請求
* @param url
* @param referer
* @return
*/
private
static
URLConnection getConnection(String url,String referer) {
try
{
URLConnection uc =
new
URL(url).openConnection();
//解決防盜鏈問題
uc.setRequestProperty(
"referer"
, referer==
null
?
"http://www.baidu.com/"
:referer);
//模擬瀏覽器
uc.setRequestProperty(
"user-agent"
,
"Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36"
);
return
uc;
}
catch
(IOException e) {
e.printStackTrace();
System.out.println(url);
return
null
;
}
}
}