C#抓取網絡上的圖片


最近在學python學了簡單的從網上抓取圖片:剛好做一個C#版本的:

下面貼代碼:

using System;
using System.IO;
using System.Collections.Generic;
using static System.Console;
using System.Text;
using System.Net;
using System.Text.RegularExpressions;

namespace Gaofajin.Net
{
 
    public class ImageReptiles
    {
        public string path { get; set; } = "Images";//圖片主目錄
        List<ImgInfo> urls = new List<ImgInfo>();存儲用戶定義的Url列表
        public delegate void UrlOverEventHandler(string msg);//處理完成
        public event UrlOverEventHandler urlOver;
        public delegate void OnErrorEventHandler(string errmsg);//發送錯誤
        public event OnErrorEventHandler onError;
        public struct ImgInfo//url及圖片存儲子目錄
        {
            public string Path { get; set; }
            public string Url { get; set; }
        }
        public List<ImgInfo> Urls{ get { return urls; } }
        public void AddUrl(string url,string path)//添加url
        {
            urls.Add(new ImgInfo() { Url = url,Path=path });
        }
        public void AddUrl(string url)
        {
            urls.Add(new ImgInfo() { Url = url});
        }
        public void  StartGetImage()//調用此方法開始抓取圖片
        {
            if (urls?.Count <= 0)
            {
                onError?.Invoke($"傳入Url集合為空,請調用{nameof(AddUrl)}方法傳入url地址!");
            }
            urlOver?.Invoke("開始抓取圖片,請稍后..........");
            foreach (ImgInfo url in urls)
            {
                string html = GetHtml(url.Url);
                List<string> list = GetImgUrlList(html);
               urlOver?.Invoke($"url:{url.Url}"+SaveImg(list,url.Path));
            }
            urlOver?.Invoke("全部操作完成!");
        }
        string GetHtml(string uri)//請求指定url取得返回html數據
        {
            Stream rsp = null;
            StreamReader sr=null;
            try
            {
                WebRequest http = WebRequest.Create(uri);
                rsp = http.GetResponse().GetResponseStream();
                sr = new StreamReader(rsp, Encoding.UTF8);
                return "成功:" + sr.ReadToEnd();
            }
            catch (Exception ex)
            {
                return "失敗:" + ex.Message;
            }
            finally
            {
                sr?.Close();
                rsp?.Close();
            }
        }
        List<string> GetImgUrlList(string html)//從返回html數據中分析提取圖片地址
        {
            if (html?.Substring(0, 2) != "成功")
            {
                return null;
            }
            List<string> list = new List<string>();

            MatchCollection mc = Regex.Matches(html, @"[A-Za-z]{4,5}://[^?!\s]*\.jpg", RegexOptions.Multiline);
            foreach (Match m in mc)
            {
                list.Add(m.Groups[0].Value);
            }
            return list;
        }
        String SaveImg(List<string> list,string subpath)//保存圖片到本地
        {
            if (list?.Count <= 0)
            {
                return "未解析到圖片地址!";
            }
            string dic = path + "\\" + subpath;
            //檢查存儲路徑
            if (!Directory.Exists(dic))
            {
                Directory.CreateDirectory(dic);
            }
            int s = 0, f = 0;
            string msg = "一共抓到{0}個圖片地址,成功下載{1}張圖片,下載失敗{2}張,圖片保存路徑{3}";
            foreach (string url in list)
            {
                //取文件名
                string name = url.Substring(url.LastIndexOf('/') + 1, url.Length - url.LastIndexOf('/') -5);
                WebClient wc = new WebClient();
                try
                {
                    wc.DownloadFile(url, dic+"\\"+ name+".jpg");
                    s++;
                    urlOver?.Invoke($"從{url}抓取圖片{ name + ".jpg"}成功!");
                }
                catch
                {
                    f++;
                    urlOver?.Invoke($"從{url}抓取圖片{name+".jpg"}失敗!");

                }
                finally { wc.Dispose(); }
            }
            return string.Format(msg, list.Count, s, f, dic);
        }
    }
}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM