C# 爬取圖片


網絡收集整理  爬取圖片 

引用AngleSharp  NuGet 包

using AngleSharp;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;

namespace CoreConsoleApp
{
    public class Program
    {
        public static void Main(string[] args)
        {
            // 設置配置以支持文檔加載
            var config = Configuration.Default.WithDefaultLoader();
            int pageIndex = 50;
            for (int i = 1; i < pageIndex; i++)
            {
                // url地址
                //var address = "https://www.qwe.com";
                var address = @"https://www.qwe.com?pageIndex=" + i;
                // 請求網頁
                var document = BrowsingContext.New(config).OpenAsync(address);
                // 根據class獲取html元素
                var cells = document.Result.QuerySelectorAll(".panel-body li");
                var fileName = (i + " - " + document.Result.Title).Replace("|", "");
                foreach (var item in cells)
                {
                    //var belle = new Belle
                    //{
                    //    Title = item.QuerySelector("img").GetAttribute("title"),
                    //    ImageUrl = item.QuerySelector("img").GetAttribute("src")
                    //};
                    var imageUrl = item.QuerySelector("img").GetAttribute("src");
                    //string str = DateTime.Now.ToString("yyyyMMddHHmmss");
                    string localPath = "D:\\Image\\" + DateTime.Now.ToString("yyyMMdd") + "\\" + fileName;
                    List<string> arr = imageUrl.Split('/').ToList();
                    if (!Directory.Exists(localPath))
                    {
                        // Create the directory it does not exist.
                        Directory.CreateDirectory(localPath);
                    }
                    string filepath = localPath + "\\" + arr.Last();

                    //方法一
                    //Download(imageUrl, filepath);

                    //方法二
                    WebClient mywebclient = new WebClient();
                    mywebclient.DownloadFile(imageUrl, filepath);
                }
            }

            Console.ReadLine();
        }

        /// <summary>
        /// Http方式下載文件
        /// </summary>
        /// <param name="url">http地址</param>
        /// <param name="localfile">本地文件</param>
        /// <returns></returns>
        public static bool Download(string url, string localfile)
        {
            bool flag = false;
            long startPosition = 0; // 上次下載的文件起始位置
            FileStream writeStream; // 寫入本地文件流對象

            long remoteFileLength = GetHttpLength(url);// 取得遠程文件長度
            System.Console.WriteLine("remoteFileLength=" + remoteFileLength);
            if (remoteFileLength == 745)
            {
                System.Console.WriteLine("遠程文件不存在.");
                return false;
            }

            // 判斷要下載的文件夾是否存在
            if (File.Exists(localfile))
            {

                writeStream = File.OpenWrite(localfile);             // 存在則打開要下載的文件
                startPosition = writeStream.Length;                  // 獲取已經下載的長度

                if (startPosition >= remoteFileLength)
                {
                    System.Console.WriteLine("本地文件長度" + startPosition + "已經大於等於遠程文件長度" + remoteFileLength);
                    writeStream.Close();

                    return false;
                }
                else
                {
                    writeStream.Seek(startPosition, SeekOrigin.Current); // 本地文件寫入位置定位
                }
            }
            else
            {
                writeStream = new FileStream(localfile, FileMode.Create);// 文件不保存創建一個文件
                startPosition = 0;
            }


            try
            {
                HttpWebRequest myRequest = (HttpWebRequest)HttpWebRequest.Create(url);// 打開網絡連接

                if (startPosition > 0)
                {
                    myRequest.AddRange((int)startPosition);// 設置Range值,與上面的writeStream.Seek用意相同,是為了定義遠程文件讀取位置
                }


                Stream readStream = myRequest.GetResponse().GetResponseStream();// 向服務器請求,獲得服務器的回應數據流


                byte[] btArray = new byte[512];// 定義一個字節數據,用來向readStream讀取內容和向writeStream寫入內容
                int contentSize = readStream.Read(btArray, 0, btArray.Length);// 向遠程文件讀第一次

                long currPostion = startPosition;

                while (contentSize > 0)// 如果讀取長度大於零則繼續讀
                {
                    currPostion += contentSize;
                    int percent = (int)(currPostion * 100 / remoteFileLength);
                    System.Console.WriteLine("percent=" + percent + "%");

                    writeStream.Write(btArray, 0, contentSize);// 寫入本地文件
                    contentSize = readStream.Read(btArray, 0, btArray.Length);// 繼續向遠程文件讀取
                }

                //關閉流
                writeStream.Close();
                readStream.Close();

                flag = true;        //返回true下載成功
            }
            catch (Exception)
            {
                writeStream.Close();
                flag = false;       //返回false下載失敗
            }

            return flag;
        }

        // 從文件頭得到遠程文件的長度
        private static long GetHttpLength(string url)
        {
            long length = 0;

            try
            {
                HttpWebRequest req = (HttpWebRequest)HttpWebRequest.Create(url);// 打開網絡連接
                HttpWebResponse rsp = (HttpWebResponse)req.GetResponse();

                if (rsp.StatusCode == HttpStatusCode.OK)
                {
                    length = rsp.ContentLength;// 從文件頭得到遠程文件的長度
                }

                rsp.Close();
                return length;
            }
            catch (Exception e)
            {
                return length;
            }

        }

    }
}

  


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM