java捕獲一個網站頁面的全部圖片


直接上代碼:

package com.jeecg.util;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CatchImage {
    // 地址
    private static final String URL = "http://news.163.com/";// 編碼
    private static final String ECODING = "UTF-8";
    // 獲取img標簽正則
    private static final String IMGURL_REG = "<img src=(.*?)[^>]*?>";
    // 獲取src路徑的正則
    private static final String IMGSRC_REG = "http:.+(\\.jpeg|\\.jpg|\\.png|\\.gif)\"";
    

    public static void main(String[] args) throws Exception {
        CatchImage cm = new CatchImage();

        // 獲得html文本內容
        String HTML = cm.getHTML(URL);

        // 獲取圖片標簽
        List<String> imgUrl = cm.getImageUrl(HTML);

        // 獲取圖片src地址
        List<String> imgSrc = cm.getImageSrc(imgUrl);

        // 下載圖片 cm.Download(imgSrc);
        cm.Download(imgSrc);
    }

    /**
     * 
     * 
     * 獲取HTML內容
     * 
     * @param url
     * @return
     * @throws Exception
     **/
    private String getHTML(String oldLink) throws Exception {
        StringBuffer sb = new StringBuffer();
        URL url = new URL(oldLink);
        HttpURLConnection connection = (HttpURLConnection) url.openConnection();
        connection.setRequestMethod("GET");
        connection.setConnectTimeout(2000);
        connection.setReadTimeout(2000);
        if (connection.getResponseCode() == 200) {
            InputStream inputStream = connection.getInputStream();
            BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream, "UTF-8"));
            String line = "";
            while ((line = reader.readLine()) != null) {
                sb.append(line);
                
            }
        }
        return sb.toString();
    }

    /**
     * 獲取ImageUrl地址
     * 
     * @param HTML
     * 
     * @return
     */
    private List<String> getImageUrl(String HTML) {
        Matcher matcher = Pattern.compile(IMGURL_REG).matcher(HTML);
        List<String> listImgUrl = new ArrayList<String>();
        while (matcher.find()) {
            listImgUrl.add(matcher.group());
        }
        return listImgUrl;
    }

    /**
     * 獲取ImageSrc地址
     * 
     * @param listImageUrl
     * 
     * @return
     **/
    private List<String> getImageSrc(List<String> listImageUrl) {
        List<String> listImgSrc = new ArrayList<String>();
        for (String image : listImageUrl) {
            Matcher matcher = Pattern.compile(IMGSRC_REG).matcher(image);
            while (matcher.find()) {
                listImgSrc.add(matcher.group().substring(0, matcher.group().length() - 1));
            }
        }
        return listImgSrc;
    }

    /**
     * 下載圖片
     * 
     * @param listImgSrc
     * @throws FileNotFoundException 
     **/
    private void Download(List<String> listImgSrc) throws Exception {
        int count = 0;
        ArrayList al = new ArrayList();
        for (String urll : listImgSrc) {
            System.out.println(urll);
            Pattern p = Pattern.compile("\\.jpg|\\.png|\\.gif|\\.jpeg[^_]");
            Matcher m = p.matcher(urll);
            while (m.find()) {
                al.add(m.group());
            }
        }
        for (String url : listImgSrc) {
            System.out.println(url);
            URL uri = new URL(url);
            InputStream in = uri.openStream();

            FileOutputStream fo = new FileOutputStream("D:/imgPage/" + count + al.get(count));

            byte[] buf = new byte[1024];
            int length = 0;
            System.out.println("開始下載:" + url);
            while ((length = in.read(buf, 0, buf.length)) != -1) {
                fo.write(buf, 0, length);
            }
            in.close();
            fo.close();
            System.out.println("下載完成");
            count++;
        }
        System.out.println(count);
    }
}

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM